├── .Rbuildignore
├── .gitignore
├── .travis.yml
├── Changelog
├── DESCRIPTION
├── NAMESPACE
├── R
    ├── functions.R
    ├── onAttach.R
    ├── pwfe.R
    └── wfe.R
├── README.md
├── man
    ├── pwfe.Rd
    └── wfe.Rd
└── src
    ├── Makevars
    ├── init.c
    ├── vector.c
    ├── vector.h
    ├── wfe.c
    └── wfe.h


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | README.md
2 | README.Rmd
3 | \.git
4 | \.gitignore
5 | \.travis\.yml
6 | \.DS_Store
7 | \.\.Rcheck
8 | builder.sh
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | .Rapp.history
 4 | 
 5 | # Session Data files
 6 | .RData
 7 | 
 8 | # Example code in package build process
 9 | *-Ex.R
10 | 
11 | # Output files from R CMD build
12 | /*.tar.gz
13 | 
14 | # Output files from R CMD check
15 | /*.Rcheck/
16 | 
17 | # RStudio files
18 | .Rproj.user/
19 | 
20 | # produced vignettes
21 | vignettes/*.html
22 | vignettes/*.pdf
23 | 
24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
25 | .httr-oauth
26 | 
27 | # knitr and R markdown default cache directories
28 | /*_cache/
29 | /cache/
30 | 
31 | # Temporary files created by R markdown
32 | *.utf8.md
33 | *.knit.md
34 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
2 | 
3 | language: R
4 | sudo: false
5 | cache: packages
6 | 


--------------------------------------------------------------------------------
/Changelog:
--------------------------------------------------------------------------------
 1 | version      date  description
 2 | 0.1    	 12/12/11  The first version on CRAN
 3 | 1.0	     07/17/12  First-difference and difference-in-differences estimator added
 4 | 1.1	     11/14/12  1) Standard errors with degrees of freedom adjustment, 2) missing data issue fixed, 3) issues with pdata.frame resolved
 5 | 1.2	     02/05/13  store.wdm option added
 6 | 1.3	     08/09/14  minor bug fixed
 7 | 1.4	     04/23/16  minor bug fixed
 8 | 1.5	     07/03/16  Implementation of Difference-in-Differences with nearest neighbor matching on pre-treatment outcome variables
 9 | 1.6	     07/18/17  standard error with degress of freedom correction (two-way)
10 | 1.7	     11/25/18  standard error with degress of freedom correction (one-way)
11 | 1.8	     01/09/19  minor update using cbind() and rbind() instead of cBind() and rBind()
12 | 1.9.1     02/13/19  White statistics degress of freedom correction and cleanup
13 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: wfe
 2 | Type: Package
 3 | Title: Weighted Linear Fixed Effects Regression Models for Causal Inference
 4 | Version: 1.9.1
 5 | Date: 2019-02-13
 6 | Authors@R: c(
 7 |   person("In Song", "Kim", email = "insong@mit.edu", role = c("aut", "cre")),
 8 |   person("Kosuke", "Imai", email = "imai@Harvard.edu", role = c("aut"))
 9 |   )
10 | Description: Provides a computationally efficient way of fitting
11 | 	     weighted linear fixed effects estimators for causal
12 | 	     inference with various weighting schemes. Weighted linear
13 | 	     fixed effects estimators can be used to estimate the
14 | 	     average treatment effects under different identification
15 | 	     strategies. This includes stratified randomized
16 | 	     experiments, matching and stratification for
17 | 	     observational studies, first differencing, and
18 | 	     difference-in-differences. The package implements methods
19 | 	     described in Imai and Kim (2017) "When should We Use
20 | 	     Linear Fixed Effects Regression Models for Causal
21 | 	     Inference with Longitudinal Data?", available at
22 | 	     <https://imai.fas.harvard.edu/research/FEmatch.html>.
23 | License: GPL(>= 2)
24 | Imports:
25 |     utils,
26 |     arm,
27 |     Matrix,
28 |     MASS,
29 |     methods
30 | Depends: R (>= 3.2.0)
31 | Encoding: UTF-8
32 | LazyData: true
33 | BugReports: https://github.com/insongkim/wfe/issues
34 | 
35 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | useDynLib(wfe, .registration = TRUE, .fixes = "C_")
 2 | export(wfe, pwfe)
 3 | 
 4 | ## importFrom(Rcpp, evalCpp)
 5 | ## exportPattern("^[[:alpha:]]+")
 6 | 
 7 | importFrom(arm, bayesglm)
 8 | import(Matrix, MASS)
 9 | 
10 | S3method(summary, wfe)
11 | S3method(print, wfe)
12 | S3method(print, summary.wfe)
13 | 
14 | S3method(summary, pwfe)
15 | S3method(print, pwfe)
16 | S3method(print, summary.pwfe)
17 | 
18 | S3method(summary, wfedid)
19 | S3method(print, wfedid)
20 | S3method(print, summary.wfedid)
21 | 
22 | 
23 | importFrom("methods", "as")
24 | importFrom("stats", "as.formula", "binomial", "coef", "fitted", "lm",
25 |            "model.frame", "model.matrix", "model.response", "na.omit",
26 |            "pchisq", "printCoefmat", "pt", "resid", "vcov")
27 | importFrom("utils", "flush.console", "object.size",
28 |            "packageDescription")
29 | 


--------------------------------------------------------------------------------
/R/functions.R:
--------------------------------------------------------------------------------
  1 | 
  2 | Index <- function(index_name, uniq_index_name, len_u_index, len_data) {
  3 | 
  4 |   return(.C("Index", as.integer(index_name), as.integer(uniq_index_name), as.integer(len_u_index),
  5 |             as.integer(len_data),
  6 |             result = integer(len_data))$result)
  7 | 
  8 | }
  9 | 
 10 | 
 11 | 
 12 | VectorizeC <- function(W, time.index, dyad.index, n.row) {
 13 | 
 14 |   return(.C("VectorizeC", as.double(W), as.integer(nrow(W)), as.integer(ncol(W)),
 15 |             as.integer(time.index), as.integer(dyad.index), as.integer(n.row),
 16 |             results = double(n.row))$results)
 17 | 
 18 | }
 19 | 
 20 | 
 21 | 
 22 | Transform <- function(y, treat, pscore) {
 23 | 
 24 |   return(.C("Transform", as.double(y), as.integer(length(y)), as.integer(treat),
 25 |             as.double(pscore), ytrans = double(length(y)))$ytrans)
 26 | 
 27 | }
 28 | 
 29 | 
 30 | 
 31 | GenTime <- function(unit_index, len_data, len_u_index) {
 32 |   return(.C("GenTime", as.integer(unit_index), as.integer(len_data),
 33 |             as.integer(len_u_index), time_index = double(len_data))$time_index)
 34 | }
 35 | 
 36 | 
 37 | 
 38 | GenWeightsUnit <- function(unit_index, time_index, tr, C_it, len_data, len_u_index, len_t_index, ate, att, size, verbose) {
 39 |   return(.C("GenWeightsUnit", as.integer(unit_index), as.integer(time_index), as.integer(tr), as.integer(C_it),
 40 |             as.integer(len_data), as.integer(len_u_index), as.integer(len_t_index),
 41 |             as.integer(ate), as.integer(att), as.integer(verbose),
 42 |             weight = double(len_u_index*len_t_index))$weight)
 43 | }
 44 | 
 45 | 
 46 | GenWeightsTime <- function(time_index, unit_index, tr, C_it, len_data, len_t_index, len_u_index, ate, att, size, verbose) {
 47 |   return(.C("GenWeightsTime", as.integer(time_index), as.integer(unit_index), as.integer(tr), as.integer(C_it),
 48 |             as.integer(len_data), as.integer(len_t_index), as.integer(len_u_index),
 49 |             as.integer(ate), as.integer(att), as.integer(verbose),
 50 |             weight = double(len_u_index*len_t_index))$weight)
 51 | }
 52 | 
 53 | 
 54 | 
 55 | 
 56 | 
 57 | GenWeightsFD <- function(unit_index, time_index, tr, C_it, len_data, len_u_index, len_t_index, ate, att, verbose) {
 58 |   return(.C("GenWeightsFD", as.integer(unit_index), as.integer(time_index), as.integer(tr), as.integer(C_it),
 59 |             as.integer(len_data), as.integer(len_u_index), as.integer(len_t_index),
 60 |             as.integer(ate), as.integer(att), as.integer(verbose),
 61 |             weightfd = double(len_u_index*len_t_index))$weightfd)
 62 | }
 63 | 
 64 | 
 65 | CalDID <- function(unit_index, time_index, tr, C_it, y, len_data, len_u_index, len_t_index, ate, att, verbose) {
 66 |   return(.C("CalDID", as.integer(unit_index), as.integer(time_index), as.integer(tr), as.integer(C_it),
 67 |             as.double(y), as.integer(len_data), as.integer(len_u_index), as.integer(len_t_index),
 68 |             as.integer(ate), as.integer(att), as.integer(verbose),
 69 |             did = double(1))$did)
 70 | }
 71 | 
 72 | 
 73 | 
 74 | DemeanDID <- function(var_name, weight, unit_index, time_index, len_u_index, len_t_index, len_data) {
 75 |   return(.C("DemeanDID", as.double(var_name), as.double(weight), 
 76 |             as.integer(unit_index), as.integer(time_index),
 77 |             as.integer(len_u_index), as.integer(len_t_index), as.integer(len_data),
 78 |             DemeanDID = double(len_data))$DemeanDID)
 79 | }
 80 | 
 81 | 
 82 | GenWeightsMDID <- function(unit_index, time_index, tr, C_it, y, maxdev.did, len_data,
 83 |                            len_u_index, len_t_index, ate, att, verbose) {
 84 |     return(.C("GenWeightsMDID", as.integer(unit_index), as.integer(time_index), as.integer(tr), as.integer(C_it),
 85 |               as.double(y), as.double(maxdev.did), as.integer(len_data), as.integer(len_u_index), as.integer(len_t_index),
 86 |               as.integer(ate), as.integer(att), as.integer(verbose),
 87 |               weightmdid = double(len_u_index*len_t_index))$weightmdid)
 88 | }
 89 | 
 90 | 
 91 | 
 92 | GenWeightsDID <- function(unit_index, time_index, tr, C_it, len_data, len_u_index, len_t_index, ate, att, verbose) {
 93 |   return(.C("GenWeightsDID", as.integer(unit_index), as.integer(time_index), as.integer(tr), as.integer(C_it),
 94 |             as.integer(len_data), as.integer(len_u_index), as.integer(len_t_index),
 95 |             as.integer(ate), as.integer(att), as.integer(verbose),
 96 |             weightdid = double(len_u_index*len_t_index))$weightdid)
 97 | }
 98 | 
 99 | Demean <- function(var_name, index, len_index, len_data) {
100 |   return(.C("Demean", as.double(var_name), as.integer(index), as.integer(len_index),
101 |             as.integer(len_data), demean = double(len_data))$demean)
102 | }
103 | 
104 | 
105 | 
106 | 
107 | WDemean <- function(var_name, weight, index, len_index, len_data) {
108 |   return(.C("WDemean", as.double(var_name), as.double(weight), as.integer(index), as.integer(len_index),
109 |             as.integer(len_data), Wdemean = double(len_data))$Wdemean)
110 | }
111 | 
112 | 
113 | 
114 | 
115 | WWDemean <- function(var_name, weight, index, len_index, len_data) {
116 |   return(.C("WWDemean", as.double(var_name), as.double(weight), as.integer(index), as.integer(len_index),
117 |             as.integer(len_data), WWdemean = double(len_data))$WWdemean)
118 | }
119 | 
120 | 
121 | 
122 | TwayDemean <- function(var_name, unit_index, time_index, len_u_index, len_t_index, len_data) {
123 |   return(.C("TwayDemean", as.double(var_name), as.integer(unit_index), as.integer(time_index),
124 |             as.integer(len_u_index), as.integer(len_t_index), as.integer(len_data),
125 |             TwayDemean = double(len_data))$TwayDemean)
126 | }
127 | 
128 | 
129 | 
130 | MDummy <-  function(index, len_index, len_data) {
131 |   return(.C("MDummy", as.integer(index), as.integer(len_index), as.integer(len_data),
132 |             dummy = integer(len_data*len_index))$dummy)
133 | }
134 | 
135 | 
136 | 
137 | 
138 | XXiSum <-  function(len_data, n_cov, unit_index, len_uniq_unit_index, X.tilde) {
139 |   return(.C("XXiSum", as.integer(len_data), as.integer(n_cov),
140 |             as.integer(unit_index), as.integer(len_uniq_unit_index),
141 |             as.double(X.tilde),
142 |             result = double(n_cov*n_cov))$result)
143 | }
144 | 
145 | 
146 | 
147 | XWXiSum <-  function(len_data, n_cov, unit_index, len_uniq_unit_index, X.tilde, weights) {
148 |   return(.C("XWXiSum", as.integer(len_data), as.integer(n_cov),
149 |             as.integer(unit_index), as.integer(len_uniq_unit_index),
150 |             as.double(X.tilde), as.double(weights),
151 |             result = double(n_cov*n_cov))$result)
152 | }
153 | 
154 | 
155 | OmegaHatHAC <-  function(len_data, n_cov, unit_index, len_uniq_unit_index, X.tilde, u.tilde) {
156 |   return(.C("OmegaHatHAC", as.integer(len_data), as.integer(n_cov),
157 |             as.integer(unit_index), as.integer(len_uniq_unit_index),
158 |             as.double(X.tilde), as.double(u.tilde),
159 |             Omega_hat_HAC = double(n_cov*n_cov))$Omega_hat_HAC)
160 | }
161 | 
162 | 
163 | OmegaHatHC <-  function(len_data, n_cov, unit_index, len_uniq_unit_index, X.tilde, u.tilde) {
164 |   return(.C("OmegaHatHC", as.integer(len_data), as.integer(n_cov),
165 |             as.integer(unit_index), as.integer(len_uniq_unit_index),
166 |             as.double(X.tilde), as.double(u.tilde),
167 |             Omega_hat_HC = double(n_cov*n_cov))$Omega_hat_HC)
168 | }
169 | 
170 | 
171 | 
172 | 
173 | OmegaDiDHAC <-  function(len_data, n_cov, unit_index, len_uniq_unit_index, X.tilde, u.tilde, W) {
174 |   return(.C("OmegaDiDHAC", as.integer(len_data), as.integer(n_cov),
175 |             as.integer(unit_index), as.integer(len_uniq_unit_index),
176 |             as.double(X.tilde), as.double(u.tilde), as.double(W),
177 |             Omega_DiD_HAC = double(n_cov*n_cov))$Omega_DiD_HAC)
178 | }
179 | 
180 | 
181 | OmegaDiDHAC2 <-  function(len_data, n_cov, unit_index, len_uniq_unit_index, X.tilde, u.tilde, W) {
182 |   return(.C("OmegaDiDHAC2", as.integer(len_data), as.integer(n_cov),
183 |             as.integer(unit_index), as.integer(len_uniq_unit_index),
184 |             as.double(X.tilde), as.double(u.tilde), as.double(W),
185 |             Omega_DiD_HAC = double(n_cov*n_cov))$Omega_DiD_HAC)
186 | }
187 | 
188 | 
189 | 
190 | OmegaDiDHAC <-  function(len_data, n_cov, unit_index, len_uniq_unit_index, X.tilde, u.tilde, W) {
191 |   return(.C("OmegaDiDHAC", as.integer(len_data), as.integer(n_cov),
192 |             as.integer(unit_index), as.integer(len_uniq_unit_index),
193 |             as.double(X.tilde), as.double(u.tilde), as.double(W),
194 |             Omega_DiD_HAC = double(n_cov*n_cov))$Omega_DiD_HAC)
195 | }
196 | 
197 | 
198 | 
199 | 
200 | ProjectionM <-  function(Q_QQinv, Q, P1.first, P1.second,
201 |                          P1.third, P1.fourth, P1.fifth,
202 |                          Y, X, len_data, n_col, n_var,
203 |                          n_p1, n_p2, n_p3, n_p4){
204 |   return(.C("ProjectionM", as.complex(Q_QQinv), as.complex(Q),
205 |             as.complex(P1.first), as.complex(P1.second),
206 |             as.complex(P1.third), as.complex(P1.fourth), as.complex(P1.fifth),
207 |             as.complex(Y), as.complex(X),
208 |             as.integer(len_data), as.integer(n_col),
209 |             as.integer(n_var), as.integer(n_p1),
210 |             as.integer(n_p2), as.integer(n_p3), as.integer(n_p4),
211 |             Projection = complex(len_data*n_var))$Projection)
212 | }
213 | 
214 | 
215 | 
216 | comp_OmegaHAC <-  function(X_1, u_1, X_2, u_2, len_data,
217 |                            n_cov, unit_index, len_unit){
218 |   return(.C("comp_OmegaHAC", as.complex(X_1), as.complex(u_1),
219 |             as.complex(X_2), as.complex(u_2), as.integer(len_data),
220 |             as.integer(n_cov), as.integer(unit_index), as.integer(len_unit),
221 |             OmegaHAC = complex(n_cov*n_cov))$OmegaHAC)
222 | }
223 | 
224 | 
225 | 
226 | comp_OmegaHC <-  function(X_1, u_1, X_2, u_2, len_data,
227 |                           n_cov, unit_index, len_unit){
228 |   return(.C("comp_OmegaHC", as.complex(X_1), as.complex(u_1),
229 |             as.complex(X_2), as.complex(u_2), as.integer(len_data),
230 |             as.integer(n_cov), as.integer(unit_index), as.integer(len_unit),
231 |             OmegaHC = complex(n_cov*n_cov))$OmegaHC)
232 | }
233 | 
234 | 
235 | 
236 | LamdaDID1 <-  function(len_Xtrow, len_Xhrow, Tunit_index, len_uniq_Tunit_index,
237 |                        Hunit_index, len_uniq_Hunit_index,
238 |                        X.tilde, len_Xtcol, u.tilde,
239 |                        X.hat, len_Xhcol, u.hat, W) {
240 |   return(.C("LamdaDID1", as.integer(len_Xtrow), as.integer(len_Xhrow),
241 |             as.integer(Tunit_index), as.integer(len_uniq_Tunit_index),
242 |             as.integer(Hunit_index), as.integer(len_uniq_Hunit_index),
243 |             as.double(X.tilde), as.integer(len_Xtcol), as.double(u.tilde),
244 |             as.double(X.hat), as.integer(len_Xhcol), as.double(u.hat), as.double(W),
245 |             LamdaDID1 = double(len_Xhcol*len_Xtcol))$LamdaDID1)
246 | }
247 | 
248 | 
249 | 
250 | LamdaDID2 <-  function(len_Xtrow, len_Xhrow, Tunit_index, len_uniq_Tunit_index,
251 |                        Hunit_index, len_uniq_Hunit_index,
252 |                        X.tilde, len_Xtcol, u.tilde,
253 |                        X.hat, len_Xhcol, u.hat, W) {
254 |   return(.C("LamdaDID2", as.integer(len_Xtrow), as.integer(len_Xhrow),
255 |             as.integer(Tunit_index), as.integer(len_uniq_Tunit_index),
256 |             as.integer(Hunit_index), as.integer(len_uniq_Hunit_index),
257 |             as.double(X.tilde), as.integer(len_Xtcol), as.double(u.tilde),
258 |             as.double(X.hat), as.integer(len_Xhcol), as.double(u.hat), as.double(W),
259 |             LamdaDID2 = double(len_Xtcol*len_Xhcol))$LamdaDID2)
260 | }
261 | 
262 | 
263 | 
264 | 
265 | 
266 | Wdemean <- function (x, w, unit.index, time.index, data) {
267 |   data$u.index <- Index(data[,unit.index])
268 |   data$t.index <- Index(data[,time.index])
269 |   uniq.unit <- unique(data$u.index)
270 |   uniq.time <- unique(data$t.index)
271 |   wdemean.x <- c()
272 |   wdemean.x <- new.tr.x <- matrix(NA, nrow = length(uniq.time), ncol = length(uniq.unit))
273 |   for (i in 1:length(uniq.unit)){
274 |     sub.i <- data[data$u.index == uniq.unit[i], ]
275 |     nr <- nrow(sub.i)
276 |     denom <- as.numeric(sum(sub.i[,w]))
277 |     x.star <- as.numeric((sub.i[,x]%*%sub.i[,w])/(denom))
278 |     tr.x <- as.vector(sub.i[,x] - rep(x.star, nr))
279 |     wdemean.x[,i] <- tr.x
280 |       # sqrt(w)* (weighted demean) for SE
281 |       for (j in 1:length(uniq.time)){
282 |        new.tr.x[j,i] <- sqrt(sub.i[,w][j])*(sub.i[,x][j] - x.star)
283 |       }
284 |   }
285 |   w.demean <- as.vector(wdemean.x)
286 |   w.tr.x <- as.vector(new.tr.x)
287 |   list(w.demeaned = wdemean.x, new.tr.x = w.tr.x)
288 | }
289 | 
290 | 
291 | 
292 | 
293 | ## a function that checks memory usage
294 | 
295 | .ls.objects <- function (pos = 1, pattern, order.by = "Size", decreasing=TRUE, head = TRUE, n = 10) {
296 |     # based on postings by Petr Pikal and David Hinds to the r-help list in 2004
297 |     # modified by: Dirk Eddelbuettel (http://stackoverflow.com/questions/1358003/tricks-to-manage-the-available-memory-in-an-r-session)
298 |     # I then gave it a few tweaks (show size as megabytes and use defaults that I like)
299 |     # a data frame of the objects and their associated storage needs.
300 |     napply <- function(names, fn) sapply(names, function(x)
301 |                                                    fn(get(x, pos = pos)))
302 |       names <- ls(pos = pos, pattern = pattern)
303 |       obj.class <- napply(names, function(x) as.character(class(x))[1])
304 |       obj.mode <- napply(names, mode)
305 |       obj.type <- ifelse(is.na(obj.class), obj.mode, obj.class)
306 |       obj.size <- napply(names, object.size) / 10^6 # megabytes
307 |       obj.dim <- t(napply(names, function(x)
308 |                                       as.numeric(dim(x))[1:2]))
309 |       vec <- is.na(obj.dim)[, 1] & (obj.type != "function")
310 |       obj.dim[vec, 1] <- napply(names, length)[vec]
311 |       out <- data.frame(obj.type, obj.size, obj.dim)
312 |       names(out) <- c("Type", "Size", "Rows", "Columns")
313 |       out <- out[order(out[[order.by]], decreasing=decreasing), ]
314 |       if (head)
315 |             out <- head(out, n)
316 |       out
317 |   }
318 | 
319 | 
320 | ## ### wfe objects for xtable
321 | 
322 | ## xtable.wfe <- function(x,caption=NULL,label=NULL,align=NULL,
323 | ##                       digits=NULL,display=NULL,...) {
324 | ##   return(xtable.summary.wfe(summary(x),caption=caption,label=label,
325 | ##                            align=align, digits=digits,display=display))
326 | ## }
327 | 
328 | ## xtable.summary.wfe <- function(x,caption=NULL,label=NULL,align=NULL,
329 | ##                               digits=NULL,display=NULL,...) {
330 | ##   x <- data.frame(x$coef,check.names=FALSE)
331 | 
332 | ##   class(x) <- c("xtable","data.frame")
333 | ##   caption(x) <- caption
334 | ##   label(x) <- label
335 | ##   align(x) <- switch(1+is.null(align),align,c("r","r","r","r","r"))
336 | ##   digits(x) <- switch(1+is.null(digits),digits,c(0,4,4,2,4))
337 | ##   display(x) <- switch(1+is.null(display),display,c("s","f","f","f","f"))
338 | ##   return(x)
339 | ## }
340 | 
341 | 


--------------------------------------------------------------------------------
/R/onAttach.R:
--------------------------------------------------------------------------------
1 | ".onAttach" <- function(lib, pkg) {
2 |   mylib <- dirname(system.file(package = pkg))
3 |   title <- packageDescription(pkg, lib.loc = mylib)$Title
4 |   ver <- packageDescription(pkg, lib.loc = mylib)$Version
5 |   packageStartupMessage(paste(pkg, ": ", title, "\nVersion: ", ver, "\n", sep=""))
6 | }
7 | 


--------------------------------------------------------------------------------
/R/pwfe.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | pwfe <- function (formula, treat = "treat.name", outcome, data, pscore = NULL,
  4 |                   unit.index, time.index = NULL, method = "unit", within.unit = TRUE,
  5 |                   qoi = c("ate", "att"), estimator = NULL, C.it = NULL,
  6 |                   White = TRUE, White.alpha = 0.05,
  7 |                   hetero.se = TRUE, auto.se = TRUE, unbiased.se = FALSE,
  8 |                   verbose = TRUE) {
  9 | 
 10 | 
 11 |   pwfe.call <- match.call()
 12 |   tn.row <- nrow(data) # total number of rows in data
 13 | 
 14 | 
 15 | 
 16 | ### Warnings
 17 |   ## Warning for missing unit & time index
 18 |   if (missing(unit.index))
 19 |     stop("'unit.index' or index for strata should be provided")
 20 | 
 21 |   if (is.null(time.index) & method == "time")
 22 |     stop("'time.index' should be provided")
 23 | 
 24 |   ## Warning for methods
 25 |   if(method=="time" && !is.null(estimator) && estimator == "fd")
 26 |     stop("First Difference is not compatible with 'time' method")
 27 | 
 28 |   if(is.null(time.index) && !is.null(estimator) && estimator  == "fd")
 29 |     stop("First Difference cannot calculate when 'time.index' is missing")
 30 | 
 31 |   if(is.null(time.index) && !is.null(estimator) && estimator  == "did")
 32 |     stop("Difference-in-Differences is not compatible with pwfe")
 33 | 
 34 |   ## Warning for C.it
 35 |   if (!is.null(C.it)){
 36 |     Cit <- data[,C.it]
 37 |     if (!is.numeric(Cit) && length(Cit)!= tn.row)
 38 |       stop("'C.it' must be a numeric vector with length equal to number of observations")
 39 |     if ( sum(Cit < 0) > 0 )
 40 |       stop("'C.it' must be a non-negative numeric vector")
 41 |   }
 42 | 
 43 |   ## Waring for pscore (if propensity score is provided by researcher)
 44 |   p.score <- data[,pscore]
 45 |   if (!is.null(pscore) && !is.numeric(p.score) && length(p.score)!= tn.row)
 46 |     stop("'pscore' must be a numeric vector with length equal to number of observations")
 47 |   if (!is.null(pscore)) {
 48 |     if (!((0 < p.score ) && (p.score < 1)))
 49 |       stop("'pscore' must be a bounded away from zero and one")
 50 |   }
 51 |   if (!is.null(pscore) && !missing(formula))
 52 |     stop("'formula' should not be provided when pscore is specified")
 53 | 
 54 | 
 55 | 
 56 | 
 57 |   ## cat("warnings done:\n")
 58 |   ## C.it
 59 |   ## Default for ATE
 60 |   if (is.null(C.it)){
 61 |     data$C.it <- as.integer(rep(1, nrow(data)))
 62 |   }
 63 | 
 64 |   ## White.alpha
 65 |   if (is.null(White.alpha)){
 66 |     White.alpha <- 0.05
 67 |   } else {
 68 |     White.alpha <- White.alpha
 69 |   }
 70 | 
 71 |   ## Warning for binary treatment
 72 | 
 73 |   ## treatment variable
 74 |   data$TR <- as.numeric(data[,treat])
 75 | 
 76 |   if (length(unique(data$TR)) !=2 ){
 77 |     stop("'treat' must be a binary vector")
 78 |   }
 79 |   if (sum(unique(data$TR)) !=1) {
 80 |     stop("'treat' must be either 0 or 1 where 1 indicates treated")
 81 |   }
 82 | 
 83 | 
 84 |   
 85 | ### Unit and Time index
 86 |   ## Creating time index for strata fixed effect analysis
 87 | 
 88 | 
 89 |   ## unit index
 90 |   
 91 |   numeric.u.index <- as.numeric(as.factor(data[,unit.index]))
 92 |   numeric.u.index[is.na(numeric.u.index)] <- 0
 93 |   ## handling missing unit index
 94 |   uniq.u <- unique(na.omit(numeric.u.index))
 95 |   uniq.u <- sort(uniq.u[!(uniq.u %in% 0)])
 96 |   J.u <- length(uniq.u)
 97 | 
 98 |   
 99 |   data$u.index <- Index(numeric.u.index, uniq.u, J.u, tn.row)
100 | 
101 |   ## time index
102 |   if (is.null(time.index)) {
103 |     data$t.index <- GenTime(data$u.index, tn.row, length(uniq.u))
104 |     numeric.t.index <- as.numeric(as.factor(data$t.index))
105 |   } else {
106 |     numeric.t.index <- as.numeric(as.factor(data[,time.index]))
107 |     numeric.t.index[is.na(numeric.t.index)] <- 0
108 |     ## handling missing time index
109 |     uniq.t <- unique(na.omit(numeric.t.index))
110 |     uniq.t <- sort(uniq.t[!(uniq.t %in% 0)])
111 |     ## needs to sort for unbalnced panel, See Index()
112 |     J.t <- length(uniq.t)
113 |     data$t.index <- Index(numeric.t.index, uniq.t, J.t, tn.row)
114 |   }
115 |   uniq.t <- unique(data$t.index)
116 | 
117 |   
118 | 
119 |   ## unique unit number and sorting data
120 |   if (method == "unit"){
121 |     unit.number <- length(uniq.u)
122 |   } else if (method == "time"){
123 |     unit.number <- length(uniq.t)
124 |   } else {
125 |     stop("method should be either unit or time")
126 |   }
127 | 
128 |   if (verbose)
129 |     cat(" \nNumber of unique", method, "is", unit.number, "\n")
130 | 
131 | 
132 |   ## order data 
133 |   data <- data[order(data$u.index, data$t.index),]
134 | 
135 |   ## saving unit index for each unit
136 |   name.unit <- unique(data[,unit.index])
137 |   number.unit <- unique(numeric.u.index)
138 | 
139 |   ## saving time index for each time
140 |   name.time <- unique(data[,time.index])
141 |   number.time <- unique(numeric.t.index)
142 | 
143 |   
144 |   
145 |   ## quantity of interest: qoi
146 | 
147 |   if (missing(qoi)){
148 |     causal <- "ate"
149 |     ate.n <- 1
150 |   }
151 | 
152 |   ate.n <-  att.n <- 0
153 |   if (qoi == "ate"){
154 |     causal <- "ATE (Average Treatment Effect)"
155 |     ate.n <- 1
156 |   }
157 |   if (qoi == "att"){
158 |     causal <- "ATT (Average Treatment Effect for the Treated)"
159 |     att.n <- 1
160 |   }
161 | 
162 |   if (is.null(estimator)) {
163 |     est <- "NULL"
164 |   }
165 |   if (!is.null(estimator) && estimator == "fd"){
166 |     est <- "FD (First-Difference)"
167 |   }
168 |   if (!is.null(estimator) && estimator == "did"){
169 |     est <- "DID (Difference-in-Differences)"
170 |   }
171 | 
172 | 
173 | ### propensity score estimation
174 | 
175 |   if (!is.null(pscore)) {
176 |     data$y.star <- Transform(data[,outcome], data[,treat], data[,pscore])
177 |   }
178 |   if (is.null(pscore)) {
179 | 
180 |     ## formula for propensity score estimation
181 |     f.left <- as.character(treat)
182 |     f.right <- unlist(strsplit(as.character(formula), "~"))[2]
183 |     c.formula <- paste(f.left,"~ -1 + ", f.right)
184 |     
185 |     
186 |     ps.formula <- as.formula(c.formula) # formula for propensity score estimation
187 | 
188 | 
189 |     data$index <- 1:nrow(data) # indexing for saving y.star
190 |     
191 |     if (verbose){
192 |       cat("\nPropensity Score estimation started based on the following model:\n", c.formula, "\n")
193 |     }
194 |     flush.console()
195 |     ## transform Y_{it}
196 |     if (method=="time" & within.unit == TRUE){
197 |       ## regression for propensity score estimation is run within unit
198 |       Data.ps <- c()
199 |       for (j in 1:length(uniq.t)){
200 |         ## print(j)
201 |         if (verbose){
202 |           cat(".")
203 |           flush.console()
204 |         }
205 | 
206 |         pool <- data[data$t.index == uniq.t[j], ] # subset data by time j
207 |         n.pool <- nrow(pool) # gives total number of units for the year j
208 |         n.treat <- sum(pool[,treat]) # number of treated units in a given year t
209 |         n.cont <- sum(1-pool[,treat]) # number of controlled units in a given year t
210 |         ## estimate propensity score within each time t
211 |         fit.ps <- bayesglm(ps.formula, family=binomial(link="logit"),
212 |                            data = pool)
213 |         pool$fitted.ps <- fitted(fit.ps) # save the estimated propensity score
214 |         pool.u.index <- unique(pool$u.index)
215 |         pool$ystar <- Transform(pool[,outcome], pool[,treat], pool$fitted.ps)
216 |         Data.ps <- rbind(Data.ps, pool)
217 |       }
218 |       data$y.star[Data.ps$index] <- Data.ps$ystar
219 | 
220 |     }
221 | 
222 |     if (method=="unit" & within.unit == TRUE) {
223 |       ## regression for propensity score estimation is run within unit
224 |       Data.ps <- c()
225 |       for (i in 1:length(uniq.u)){
226 | 
227 |         if (verbose){
228 |           if (i %% 100 == 0)
229 |             cat(".")
230 |           flush.console()
231 |         }
232 | 
233 |         pool <- data[data$u.index == uniq.u[i], ] # subset data by unit i
234 |         n.pool <- nrow(pool) # gives total number of times for the unit i
235 |         n.treat <- sum(pool[,treat]) # number of treated times in a given unit i
236 |         n.cont <- sum(1-pool[,treat]) # number of controlled times in a given unit i
237 |         ## estimate propensity score within each time t
238 |         fit.ps <- bayesglm(ps.formula, family=binomial(link="logit"), data
239 |                            = pool)
240 |         pool$fitted.ps <- fitted(fit.ps) # save the estimated propensity score
241 |         pool.t.index <- unique(pool$t.index)
242 |         pool$ystar <- Transform(pool[,outcome], pool[,treat], pool$fitted.ps)
243 |         Data.ps <- rbind(Data.ps, pool)
244 |       }
245 |       data$y.star[Data.ps$index] <- Data.ps$ystar
246 | 
247 |     }
248 | 
249 |     
250 | 
251 |     ## transform Y_{it}
252 |     if (within.unit == FALSE){
253 |       ## regression for propensity score estimation is on entire data
254 |       n.treat <- sum(data[,treat]) # number of treated units 
255 |       n.cont <- sum(1-data[,treat]) # number of controlled units 
256 |       ## estimate propensity score within each time t
257 |       fit.ps <- bayesglm(ps.formula, family=binomial(link="logit"),
258 |                          data = data)
259 |       data$fitted.ps <- fitted(fit.ps) # save the estimated propensity score
260 |       data$y.star <- Transform(data[,outcome], data[,treat], data$fitted.ps)
261 |     }
262 | 
263 |   }
264 |   
265 |   if (verbose)
266 |     cat("\nPropensity Score estimation done \n")
267 |   flush.console()
268 |   
269 | 
270 | 
271 |   if (verbose)
272 |     cat("\nWeight calculation started ")
273 |   flush.console()
274 |   ## Unit fixed effects models
275 |   if ( (method=="unit" & qoi=="ate" & is.null(estimator) ) | (method=="unit" & qoi=="att" & is.null(estimator)) ) {
276 |     W <- GenWeightsUnit(data$u.index, data$t.index, data$TR, data$C.it, tn.row, length(uniq.u), length(uniq.t), ate.n, att.n, length(uniq.u)*length(uniq.t), verbose)
277 |     W <- matrix(W, nrow=length(uniq.t), ncol=length(uniq.u), byrow=T)
278 |     data$W.it <- VectorizeC(as.matrix(W), data$t.index, data$u.index, tn.row)
279 |   }
280 | 
281 |   ## Time fixed effects models
282 |   if ( (method=="time" & qoi=="ate" & is.null(estimator) ) | (method=="time" & qoi=="att" & is.null(estimator)) ) {
283 |     W <- GenWeightsTime(data$t.index, data$u.index, data$TR, data$C.it, tn.row, length(uniq.t), length(uniq.u), ate.n, att.n, length(uniq.t)*length(uniq.u), verbose)
284 |     W <- matrix(W, nrow=length(uniq.t), ncol=length(uniq.u), byrow=T)
285 |     data$W.it <- VectorizeC(as.matrix(W), data$t.index, data$u.index, tn.row)
286 |   }
287 | 
288 | 
289 |   ## Within Unit First Difference
290 |   if(( (method=="unit") && (qoi == "ate") && (!is.null(estimator) && estimator == "fd")) | ((method == "unit") && (qoi =="att") && (!is.null(estimator) && estimator == "fd"))) {
291 |     W <- GenWeightsFD(data$u.index, data$t.index, data$TR, data$C.it, tn.row, length(uniq.u), length(uniq.t), ate.n, att.n, verbose)
292 |     W <- matrix(W, nrow=length(uniq.t), ncol=length(uniq.u), byrow=T)
293 |     data$W.it <- VectorizeC(as.matrix(W), data$t.index, data$u.index, tn.row)
294 |   }
295 |   if (verbose)
296 |     cat(" Weight calculation done \n")
297 |   ## print(Sys.time())
298 |   flush.console()
299 |   ##cat("weights:", print(W), "\n")
300 | 
301 | 
302 |   ## e <- environment()
303 |   ## save(file = "temp.RData", list = ls(), env = e)
304 | 
305 | 
306 |   Data.final <- as.data.frame(cbind(data$y.star, data$TR, data$W.it, data$u.index, data$t.index))
307 |   colnames(Data.final) <- c("y.star", "TR", "W.it", "u.index", "t.index")
308 | 
309 | 
310 | 
311 |   ## Creating the matrix for final analysis
312 |   Data.dm <- as.data.frame(matrix(NA, ncol = 2, nrow = nrow(Data.final)))
313 |   Data.wdm <- as.data.frame(matrix(NA, ncol = 2, nrow = nrow(Data.final)))
314 | 
315 | 
316 |   ## column names
317 |   ## print(colnames(Data.final))
318 |   colnames(Data.dm) <- colnames(Data.wdm) <- c("dep.var", "treat")
319 | 
320 |   ## de-meaning for fast calculation
321 |   for (k in 1:2){
322 |     if (method == "unit"){
323 |       w.wdemean <- WWDemean(Data.final[,k], Data.final$W.it, Data.final$u.index, unit.number, tn.row)
324 |       demean <- Demean(Data.final[,k], Data.final$u.index, unit.number, tn.row)
325 |     }
326 |     if (method == "time"){
327 |       w.wdemean <- WWDemean(Data.final[,k], Data.final$W.it, Data.final$t.index, unit.number, tn.row)
328 |       demean <- Demean(Data.final[,k], Data.final$u.index, unit.number, tn.row)
329 |     }
330 |     Data.wdm[,k] <- as.vector(w.wdemean)
331 |     Data.dm[,k] <- as.vector(demean)
332 |   }
333 | 
334 |   Data.dm$unit <- Data.final$u.index
335 |   Data.dm$time <- Data.final$t.index
336 |   Data.wdm$unit <- Data.final$u.index
337 |   Data.wdm$time <- Data.final$t.index
338 | 
339 |   ## Print de(weighted)-meaned data
340 | 
341 |   ## cat("#####", "\n","De-meaned Data:","\n")
342 |   ## print(Data.dm)
343 |   ## cat("#####", "\n","sqrt(W) x (weighted demeaned):","\n")
344 |   ## print(Data.wdm)
345 | 
346 |   ## final regression on weighted demeaned data
347 |   fit.final <- lm(dep.var ~ -1 + treat, data = Data.wdm)
348 |   fit.ols <- lm(dep.var ~ -1 + treat, data = Data.dm)
349 | 
350 |   ## print(summary(fit.final))
351 | 
352 |   ## set up data frame, with support for standard and modified responses
353 |   
354 |   mf.final <- model.frame(formula="dep.var ~ -1 + treat", data=Data.wdm)
355 |   X <- model.matrix(attr(mf.final, "terms"), data=mf.final)
356 |   Y <- model.response(mf.final, "numeric")
357 |   p <- ncol(X)
358 | 
359 |   coef.wls <- fit.final$coef
360 |   coef.ols <- fit.ols$coef
361 |   d.f <- fit.final$df - unit.number
362 | 
363 |   sigma2 <- sum(resid(fit.final)^2)/d.f
364 |   vcov.wls <- vcov(fit.final)*((fit.final$df)/(fit.final$df - unit.number))
365 |   var.cov <- vcov.wls
366 |   vcov.ols <- vcov(fit.ols)
367 | 
368 |   ## e <- environment()
369 |   ## save(file = "temp.RData", list = ls(), env = e)
370 | 
371 |   
372 |   ## residual <-  resid(fit.final)*1/sqrt(Data.final$W.it)
373 |   residual <- c(data[,outcome] -  data[,treat]%*%t(coef.wls))
374 |   resid.ols <- resid(fit.ols)
375 | 
376 |   ## save residuals
377 |   Data.wdm$u.tilde <-  sqrt(Data.final$W.it)*resid(fit.final)
378 |   Data.dm$u.hat <- u.hat <- resid(fit.ols)
379 | 
380 | 
381 | 
382 | ### Robust Standard Errors
383 | 
384 |   ## contructing de(weighted)-meaned X matrix
385 |   X.tilde <- as.matrix(Data.wdm[,2]) # NT x 1 (where p is number of covariates)
386 |   X.hat <- as.matrix(Data.dm[,2]) # NT x 1 (where p is number of covariates)
387 | 
388 |   ## residuals
389 |   u.tilde <- as.matrix(Data.wdm$u.tilde) # NT x 1
390 |   u.hat <- as.matrix(Data.dm$u.hat)  # NT x 1
391 | 
392 |   ## unit vector (the length should be same as nrow(X.tilde) and length(u.tilde)
393 |   wdm.unit <- as.vector(Data.wdm$unit)
394 |   uniq.u.pw <- length(unique(wdm.unit)) # number of uniq units with positive weights
395 | 
396 | 
397 |   ## cat("3: Robust error calculation started\n")
398 |   ## print(Sys.time())
399 | 
400 |   ginv.XX.tilde <- ginv(crossprod(X.tilde, X.tilde))
401 |   ginv.XX.hat <- ginv(crossprod(X.hat, X.hat))
402 | 
403 |   diag.ee.tilde <- c(u.tilde^2)
404 |   diag.ee.hat <- c(u.hat^2)
405 | 
406 |   
407 |   if ((hetero.se == TRUE) & (auto.se == TRUE)) {# Default is Arellano
408 |     std.error <- "Heteroscedastic / Autocorrelation Robust Standard Error"
409 | 
410 |     ## 1. arbitrary autocorrelation as well as heteroskedasticity (Eq 12)
411 | 
412 |     Omega.hat.HAC <- OmegaHatHAC(nrow(X.tilde), p, wdm.unit, J.u, X.tilde, u.tilde)
413 |     Omega.hat.HAC <- matrix(Omega.hat.HAC, nrow = p, ncol = p)
414 |     Omega.hat.HAC <- (1/(nrow(X.tilde)))* Omega.hat.HAC # without degree of freedom adjustment 
415 |     ## Omega.hat.HAC <- (1/(nrow(X.tilde) - J.u - p))* Omega.hat.HAC
416 |     
417 |     Omega.hat.fe.HAC <- OmegaHatHAC(nrow(X.hat), p, wdm.unit, J.u, X.hat, u.hat)
418 |     Omega.hat.fe.HAC <- matrix(Omega.hat.fe.HAC, nrow = p, ncol = p)
419 |     Omega.hat.fe.HAC <- (1/(nrow(X.hat))) * Omega.hat.fe.HAC # without degree of freedom adjustment
420 |     ## Omega.hat.fe.HAC <- (1/(nrow(X.hat) - J.u - p)) * Omega.hat.fe.HAC
421 | 
422 |     Psi.hat.wfe <- (nrow(X.tilde) * ginv.XX.tilde) %*% Omega.hat.HAC %*% (nrow(X.tilde) * ginv.XX.tilde)
423 |     Psi.hat.fe <- (nrow(X.hat) * ginv.XX.hat) %*% Omega.hat.fe.HAC %*% (nrow(X.hat) * ginv.XX.hat)
424 | 
425 |   } else if ( (hetero.se == TRUE) & (auto.se == FALSE) ) {# independence across observations but heteroskedasticity
426 |     std.error <- "Heteroscedastic Robust Standard Error"
427 | 
428 |     ## 2. independence across observations but heteroskedasticity (Eq 11)
429 |     
430 |     ## Omega.hat.HC <- OmegaHatHC(nrow(X.tilde), p, wdm.unit, J.u, X.tilde, u.tilde)
431 |     ## Omega.hat.HC <- matrix(Omega.hat.HC, nrow = p, ncol = p)
432 |     ## ## same as the following matrix multiplication but slower
433 |     ## ## Omega.hat.he <- t(X.tilde) %*% diag(c(u.tilde)^2, nrow=nrow(X.tilde)) %*% X.tilde
434 |     ## Omega.hat.HC <- (1/(nrow(X.tilde) - J.u - p)) * Omega.hat.HC
435 | 
436 | 
437 |     Omega.hat.HC <- (1/(nrow(X.tilde) - J.u - p))*(crossprod((X.tilde*diag.ee.tilde), X.tilde)) 
438 |     
439 |     Omega.hat.fe.HC <- (1/(nrow(X.hat) - J.u - p))*(crossprod((X.hat*diag.ee.hat), X.hat))  
440 |     
441 |     
442 | ### Stock-Watson (Econometrica 2008: Eq(6)): Bias-asjusted for balance panel
443 |     if (unbiased.se == TRUE) {
444 |       ## check if panel is balanced
445 |       if (sum(as.numeric(apply(matrix(table(wdm.unit)), 1, mean) != mean(matrix(table(wdm.unit)))))  == 0 ){
446 |         std.error <- "Heteroskedastic Standard Error (Stock-Watson Biased Corrected)"
447 |         
448 |         B.hat <- matrix(0, nrow=dim(X.tilde)[2], ncol=dim(X.tilde)[2])
449 |         B2.hat <- matrix(0, nrow=dim(X.hat)[2], ncol=dim(X.hat)[2])
450 |         for (i in 1:J.u) {
451 |           ## cat("unit", i, "\n")
452 |           X.i <- X.tilde[Data.wdm$unit == i,]
453 |           X2.i <- X.hat[Data.wdm$unit == i,]
454 |           ## print(sum(as.numeric(Data.wdm$unit == i)))
455 |           ## print(X.i)
456 |           if (sum(as.numeric(Data.wdm$unit == i)) > 1) { 
457 |             u.i <- u.tilde[Data.wdm$unit == i]
458 |             u2.i <- u.hat[Data.wdm$unit == i]
459 |             ## print(length(u.i))
460 |             flush.console()
461 |             XX.i <- crossprod(X.i, X.i)
462 |             XX2.i <- crossprod(X2.i, X2.i)
463 |             B.hat <- B.hat + ((1/J.t)* XX.i*(1/(J.t-1))*sum(u.i^2))
464 |             B2.hat <- B2.hat + ((1/J.t)* XX2.i*(1/(J.t-1))*sum(u2.i^2))
465 |           }
466 |         }
467 | 
468 |         B.hat <- B.hat * (1/J.u)
469 |         B2.hat <- B2.hat * (1/J.u)
470 | 
471 |         cat("time", J.t, "\n")
472 |         Sigma_HRFE <- ((J.t-1)/(J.t-2))*(Omega.hat.HC - (1/(J.t-1))*B.hat)
473 |         Psi.hat.wfe <- (nrow(X.tilde) * ginv.XX.tilde) %*% Sigma_HRFE %*% (nrow(X.tilde) * ginv.XX.tilde)
474 | 
475 |         Sigma2_HRFE <- ((J.t-1)/(J.t-2))*(Omega.hat.fe.HC - (1/(J.t-1))*B2.hat)
476 |         Psi.hat.fe <- (nrow(X.hat) * ginv.XX.hat) %*% Sigma2_HRFE %*% (nrow(X.hat) * ginv.XX.hat)
477 |       } else { 
478 |         stop ("unbiased.se == TRUE is allowed only when panel is balanced")
479 |       }
480 |     }
481 |     
482 |     Psi.hat.wfe <- (nrow(X.tilde) * ginv.XX.tilde) %*% Omega.hat.HC %*% (nrow(X.tilde) * ginv.XX.tilde)
483 |     
484 |     Psi.hat.fe <- (nrow(X.hat) * ginv.XX.hat) %*% Omega.hat.fe.HC %*% (nrow(X.hat) * ginv.XX.hat)
485 | 
486 | 
487 |   } else if ( (hetero.se == FALSE) & (auto.se == FALSE) ) {# indepdence and homoskedasticity
488 |     std.error <- "Homoskedastic Standard Error"
489 |     
490 |     Psi.hat.wfe <- nrow(X.tilde) * (sigma2 * ginv.XX.tilde)
491 |     
492 |     ## same as the following
493 | 
494 |     ## Psi.hat.wfe2 <- J.u * sigma2 * solve(XX.tilde)
495 |     ## cat("compare homoskedasticity s.e\n")
496 |     ## print(sqrt(diag(Psi.hat.wfe)))
497 |     ## print(sqrt(diag(Psi.hat.wfe2)))
498 |     
499 |     Psi.hat.fe <- nrow(X.hat) * vcov(fit.ols)
500 | 
501 |   } else if ( (hetero.se == FALSE) & (auto.se == TRUE) ) {# Kiefer
502 | 
503 |     stop ("robust standard errors with autocorrelation and homoskedasiticy is not supported")
504 | 
505 |   }
506 | 
507 |   var.cov <- Psi.hat.wfe * (1/nrow(X.tilde))
508 |   var.cov.fe <- Psi.hat.fe *(1/nrow(X.hat))
509 | 
510 | 
511 | 
512 | ### White (1980) Test: Theorem 4
513 | 
514 |       if (White == TRUE){
515 |         
516 |         diag.ee <- c(u.hat) * c(u.tilde)
517 |         
518 |         Lambda.hat1 <-  1/((nrow(X.hat)))* (crossprod((X.hat*diag.ee), X.tilde))  
519 |         Lambda.hat2 <-  1/((nrow(X.tilde)))* (crossprod((X.tilde*diag.ee), X.hat))  
520 | 
521 |         Phi.hat <- Psi.hat.wfe + Psi.hat.fe - (nrow(X.hat)*ginv.XX.hat) %*% Lambda.hat1 %*% (nrow(X.tilde)*ginv.XX.tilde) - (nrow(X.tilde)*ginv.XX.tilde) %*% Lambda.hat2 %*% (nrow(X.hat)*ginv.XX.hat)
522 | 
523 |         ## White test: null hypothesis is ``no misspecification''
524 | 
525 |         white.stat <- as.double(Re(nrow(X.hat) * t(coef.ols - coef.wls) %*% ginv(Phi.hat) %*% (coef.ols - coef.wls)))
526 |         test.null <- pchisq(as.numeric(white.stat), df=p, lower.tail=F) < White.alpha
527 |         white.p <- pchisq(as.numeric(white.stat), df=p, lower.tail=F)
528 |         flush.console()
529 | 
530 |         ## if (verbose) {
531 |         ##   cat("\nWhite calculation done")
532 |         ##   flush.console()
533 |         ## }
534 | 
535 |       } else {
536 |         white.stat <- "NULL"
537 |         test.null <- "NULL"
538 |         white.p <- "NULL"
539 |       }
540 | 
541 |   mf <- as.data.frame(cbind(data[,outcome], data[,treat]))
542 |   colnames(mf) <- c(outcome, treat)
543 |   y <- as.data.frame(data[,outcome])
544 |   colnames(y) <- outcome
545 |   x <- as.data.frame(data[,treat])
546 |   colnames(x) <- treat
547 |   
548 | 
549 | ### Saving results
550 |   z <- list(coefficients = coef.wls,
551 |             x = X,
552 |             y = y,
553 |             mf = mf,
554 |             call = pwfe.call,
555 |             vcov = var.cov,
556 |             se = sqrt(diag(var.cov)),
557 |             sigma = sqrt(sigma2),
558 |             df = d.f,
559 |             residuals = residual,
560 |             W = W,
561 |             unit.name = name.unit,
562 |             unit.index = number.unit,
563 |             time.name = name.time,
564 |             time.index = number.time,
565 |             method = method,
566 |             causal = causal,
567 |             est = est,
568 |             std.error = std.error,
569 |             White.pvalue = white.p,
570 |             White.alpha = White.alpha,
571 |             White.stat = white.stat,
572 |             White.test = test.null)
573 |   class(z) <- "pwfe"
574 |   z
575 | 
576 | 
577 | 
578 | }
579 | 
580 | 
581 | print.pwfe <- function(x,...){
582 |   cat("Call:\n")
583 |   print(x$call)
584 |   cat("\nCoefficients:\n")
585 |   print(x$coefficients)
586 |   cat("\nStd.Err:\n")
587 |   print(x$se)
588 | }
589 | 
590 | 
591 | 
592 | summary.pwfe <- function(object, signif.stars = getOption("show.signif.stars"),...){
593 |   se <- object$se
594 |   sigma <- object$sigma
595 |   df <- object$df
596 |   tval <- coef(object) / se
597 |   TAB <- cbind(Estimate = coef(object),
598 |                Std.Err = se,
599 |                t.value = tval,
600 |                p.value = 2*pt(-abs(tval), df = object$df))
601 |   res <- list(call = object$call,
602 |               coefficients = TAB,
603 |               sigma = object$sigma,
604 |               df = object$df,
605 |               W = object$weights,
606 |               residuals = object$residuals,
607 |               method = object$method,
608 |               causal = object$causal,
609 |               estimator = object$est,
610 |               std.error = object$std.error,
611 |               White.pvalue = object$White.pvalue,
612 |               White.alpha = object$White.alpha,
613 |               White.stat = object$White.stat,
614 |               White.test = object$White.test)
615 |   class(res) <- "summary.pwfe"
616 |   res
617 | }
618 | 
619 | 
620 | print.summary.pwfe <- function(x, ...){
621 |   cat("\nMethod:", x$method, "Fixed Effects (Propensity Score)\n")
622 |   cat("\nQuantity of Interest:", x$causal)
623 |   cat("\nEstimator:", x$estimator)
624 |   cat("\nStandard Error:", x$std.error)
625 |   cat("\n")
626 |   cat("\n")
627 |   cat("Call:\n")
628 |   print(x$call)
629 |   cat("\n")
630 |   printCoefmat(x$coefficients, P.values=TRUE, has.Pvalue=TRUE)
631 |   cat("\nResidual standard error:", format(signif(x$sigma,
632 |                                                   4)), "on", x$df, "degrees of freedom")
633 |   cat("\nWhite statistics for functional misspecification:", x$White.stat, "with Pvalue=", x$White.pvalue)
634 |   cat("\nReject the null of NO misspecification:", x$White.test)
635 |   cat("\n")
636 | }
637 | 


--------------------------------------------------------------------------------
/R/wfe.R:
--------------------------------------------------------------------------------
   1 | wfe <- function (formula, data, treat = "treat.name",
   2 |                  unit.index, time.index = NULL, method = "unit",
   3 |                  dyad1.index = NULL, dyad2.index = NULL,
   4 |                  qoi = "ate", estimator = NULL, C.it = NULL,
   5 |                  hetero.se = TRUE, auto.se = TRUE,
   6 |                  dyad.se = FALSE,
   7 |                  White = TRUE, White.alpha = 0.05,
   8 |                  verbose = TRUE, unbiased.se = FALSE, unweighted = FALSE,
   9 |                  store.wdm = FALSE, maxdev.did= NULL,
  10 |                  tol = sqrt(.Machine$double.eps)){
  11 | 
  12 | 
  13 |     wfe.call <- match.call()
  14 |     ## set up data frame, with support for standard and modified responses
  15 |     mf <- model.frame(formula=formula, data=data)
  16 |     x <- model.matrix(attr(mf, "terms"), data=mf)
  17 |     y <- model.response(mf, "numeric")
  18 |     tn.row <- nrow(mf) # total number of rows in data
  19 | 
  20 |     class(data) <- "data.frame"
  21 | 
  22 |     ## ## remove missing variables: removing rows with missing values in either y or treat
  23 |     remove.indices <- which(!rownames(data) %in% rownames(mf))
  24 |     
  25 |     
  26 |     if (length(remove.indices) > 0){
  27 |         data <- data[-remove.indices,]
  28 |         if (verbose)
  29 |             cat(" \nMissing values are removed\n")
  30 |     }
  31 | 
  32 |     data$y <- y
  33 |     
  34 |     ## Creating dummies variables for White test in the end
  35 |     X <- as.data.frame(x[,-1])
  36 |     p <- ncol(X)
  37 | 
  38 |     ## e <- environment()
  39 |     ## save(file = "temp.RData", list = ls(), env = e)
  40 |     
  41 | ### --------------------------------------------------------
  42 | ### Warnings
  43 | ### --------------------------------------------------------
  44 | 
  45 |     ## Warning for missing unit & time index
  46 |     if (missing(unit.index))
  47 |         stop("'unit.index' or index for strata should be provided")
  48 | 
  49 |     if (is.null(time.index) & method == "time")
  50 |         stop("'time.index' should be provided")
  51 | 
  52 |     ## Warning for methods
  53 |     if(method=="time" && !is.null(estimator) && estimator == "fd")
  54 |         stop("First Difference is not compatible with 'time' method: set method == 'unit'")
  55 | 
  56 |     if(method=="time" && !is.null(estimator) && estimator == "did")
  57 |         stop("Difference-in-Differences is not compatible with 'time' method: set method == 'unit'")
  58 | 
  59 |     if(method=="time" && !is.null(estimator) && estimator == "Mdid")
  60 |         stop("Match-Difference-in-Differences is not compatible with 'time' method: set method == 'unit'")
  61 | 
  62 |     if(is.null(time.index) && !is.null(estimator) && estimator == "fd")
  63 |         stop("First Difference cannot calculate when 'time.index' is missing")
  64 | 
  65 |     ## Warning for C.it
  66 |     if (!is.null(C.it)){
  67 |         Cit <- data[,C.it]
  68 |         if (!is.numeric(Cit) && length(Cit)!= tn.row)
  69 |             stop("'C.it' must be a numeric vector with length equal to number of observations")
  70 |         if ( sum(Cit < 0) > 0 )
  71 |             stop("'C.it' must be a non-negative numeric vector")
  72 |     }
  73 | 
  74 |     ## cat("warnings done:\n")
  75 |     
  76 |     ## C.it
  77 |     ## Default for ATE
  78 |     if (is.null(C.it)){
  79 |         data$C.it <- as.integer(rep(1, nrow(data)))
  80 |     }
  81 | 
  82 |     ## White.alpha
  83 |     if (is.null(White.alpha)){
  84 |         White.alpha <- 0.05
  85 |     } else {
  86 |         White.alpha <- White.alpha
  87 |     }
  88 | 
  89 |     ## Warning for binary treatment
  90 |     ## treat should be 0,1 where 1 indicates treatment
  91 | 
  92 |     ## Warning for maxdev.did
  93 |     if(!is.null(maxdev.did) && maxdev.did < 0){
  94 |         stop("Warning: maxdev.did should be a positive numeric value")
  95 |     }
  96 | 
  97 | 
  98 |     ## --------------------------------------------------------
  99 |     ## Unit and Time index
 100 |     ## -------------------------------------------------------- 
 101 |     ## Creating time index for strata fixed effect analysis
 102 | 
 103 |     ## storing original unit and time index
 104 |     orig.unit.idx <- as.character(data[,unit.index])
 105 | 
 106 |     ## unit index
 107 |     numeric.u.index <- as.numeric(as.factor(data[,unit.index]))
 108 |     numeric.u.index[is.na(numeric.u.index)] <- 0
 109 |     ## handling missing unit index
 110 |     uniq.u <- unique(na.omit(numeric.u.index))
 111 |     uniq.u <- sort(uniq.u[!(uniq.u %in% 0)])
 112 |     J.u <- length(uniq.u)
 113 |     data$u.index <- Index(numeric.u.index, uniq.u, J.u, tn.row)
 114 | 
 115 |     ## for dyadic data: unit 1 and unit 2 that compose each dyad
 116 |     if(dyad.se == TRUE){
 117 |         ## Warning 
 118 |         if(is.null(dyad1.index) | is.null(dyad2.index)){
 119 |             stop("Warning: For dyadic data, two separate unit indices for the members of each dyad -- dyad1.index, dyad2.indx -- should be provided")
 120 |         }
 121 | 
 122 |         data$dyad <- data[, unit.index]
 123 |         data$c1 <- data[, dyad1.index]
 124 |         data$c2 <- data[, dyad2.index]
 125 | 
 126 |     }
 127 | 
 128 |     ## time index
 129 |     if (is.null(time.index)) {
 130 |         data$t.index  <- GenTime(data$u.index, tn.row, length(uniq.u))
 131 |         numeric.t.index <- as.numeric(as.factor(data$t.index))
 132 | 
 133 |         ## storing original unit and time index
 134 |         orig.time.idx <- as.character(data$t.index)
 135 |     } else {
 136 |         ## storing original unit and time index
 137 |         orig.time.idx <- as.character(data[,time.index])
 138 |         
 139 |         numeric.t.index <- as.numeric(as.factor(data[,time.index]))
 140 |         numeric.t.index[is.na(numeric.t.index)] <- 0
 141 |         ## handling missing time index
 142 |         uniq.t <- unique(na.omit(numeric.t.index))
 143 |         uniq.t <- sort(uniq.t[!(uniq.t %in% 0)])
 144 |         ## needs to sort for unbalnced panel, See Index()
 145 |         J.t <- length(uniq.t)
 146 |         data$t.index <- Index(numeric.t.index, uniq.t, J.t, tn.row)
 147 |     }
 148 |     uniq.t <- unique(data$t.index)
 149 | 
 150 |     
 151 |     ## unique unit number and sorting data
 152 |     if (method == "unit"){
 153 |         unit.number <- length(uniq.u)
 154 |     } else if (method == "time"){
 155 |         unit.number <- length(uniq.t)
 156 |     } else {
 157 |         stop("method should be either unit or time")
 158 |     }
 159 | 
 160 |     if (verbose)
 161 |         cat(" \nNumber of unique", method, "is", unit.number, "\n")
 162 | 
 163 |     ## e <- environment()
 164 |     ## save(file = "temp.RData", list = ls(), env = e)
 165 | ### -------------------------------------------------------- 
 166 | 
 167 |     ## order data by unit index
 168 | 
 169 |     tmp <- cbind(X, data$u.index, data$t.index) 
 170 |     tmp <- tmp[order(tmp[,(p+1)], tmp[,(p+2)]),]
 171 |     X <- as.data.frame(tmp[,-((p+1):(p+2))])
 172 |     colnames(X) <- colnames(x)[-1]
 173 | 
 174 |     data <- data[order(data$u.index, data$t.index),]
 175 |     y <- data$y[order(data$u.index, data$t.index)]
 176 | 
 177 |     ## e <- environment()
 178 |     ## save(file = "temp.RData", list = ls(), env = e)
 179 | 
 180 |     ## saving unit index for each unit
 181 |     name.unit <- unique(data[,unit.index])
 182 |     number.unit <- unique(numeric.u.index)
 183 |     units <- as.data.frame(cbind(number.unit,as.character(name.unit)))
 184 |     colnames(units) <- c("unit.index", "unit")
 185 |     rownames(units) <- seq(1:nrow(units))
 186 |     
 187 |     ## saving time index for each time
 188 | 
 189 |     if (is.null(time.index)) {
 190 |         name.time  <- unique(data$t.index)
 191 |     } else {
 192 |         name.time <- unique(data[,time.index])
 193 |     }
 194 |     number.time <- unique(numeric.t.index)
 195 |     times <- cbind(number.time, name.time)
 196 |     times <- as.data.frame(times[order(times[,1]),])
 197 |     colnames(times) <- c("time.index", "time")
 198 | 
 199 |     
 200 |     ## new model frame order by u.index
 201 |     mf.col <- colnames(mf)
 202 |     mf.sorted <- cbind(y,X)
 203 |     colnames(mf.sorted) <- mf.col
 204 |     
 205 |     ## treatment variable
 206 |     data$TR <- as.numeric(data[,treat])
 207 | 
 208 |     if (length(unique(data$TR)) !=2)
 209 |         stop("'treat' must be a binary vector: there are more than two values of treatment")
 210 | 
 211 |     if (sum(unique(data$TR)) !=1)
 212 |         stop("'treat' must be a either zero or one where one indicates treatment")
 213 | 
 214 | ### --------------------------------------------------------
 215 | ### Quantity of interest: qoi
 216 | ### -------------------------------------------------------- 
 217 | 
 218 |     if (missing(qoi)){
 219 |         qoi <- "ate"
 220 |         causal <- "ate"
 221 |         ate.n <- 1
 222 |     }
 223 | 
 224 |     ate.n <-  att.n <- 0
 225 |     if (qoi == "ate"){
 226 |         causal <- "ATE (Average Treatment Effect)"
 227 |         ate.n <- 1
 228 |     }
 229 |     if (qoi == "att"){
 230 |         causal <- "ATT (Average Treatment Effect for the Treated)"
 231 |         att.n <- 1
 232 |     }
 233 | 
 234 |     if (is.null(estimator)) {
 235 |         est <- "NULL"
 236 |     }
 237 |     if (!is.null(estimator) && estimator == "fd"){
 238 |         est <- "FD (First-Difference)"
 239 |     }
 240 |     if (!is.null(estimator) && estimator == "did"){
 241 |         est <- "DID (Difference-in-Differences)"
 242 |     }
 243 |     if (!is.null(estimator) && estimator == "Mdid"){
 244 |         est <- "DID (Difference-in-Differences) with Matching on Pre-treatment Outcome"
 245 |     }
 246 | 
 247 |     if (unweighted == TRUE){
 248 |         causal <- "Unweighted (Standard) Fixed Effect"
 249 |     }
 250 | 
 251 |     
 252 | ### Weights calculation
 253 | 
 254 |     ## One-way Weighted FE
 255 |     if (is.null(estimator) || estimator=="fd") {
 256 | 
 257 |         if (verbose) {
 258 |             cat("\nWeight calculation started ")
 259 |             flush.console()
 260 |         }
 261 |         
 262 |         ## Standard Fixed effect
 263 |         if (unweighted == TRUE) {
 264 |             data$W.it <- rep(1, nrow(data))
 265 |             W <- matrix(1,nrow=length(uniq.t), ncol=length(uniq.u))
 266 |         } else {
 267 |             
 268 |             ## Unit fixed effects models
 269 |             if ( (method=="unit" & qoi=="ate" & is.null(estimator) ) | (method=="unit" & qoi=="att" & is.null(estimator)) ) {
 270 |                 W <- GenWeightsUnit(data$u.index, data$t.index, data$TR, data$C.it, tn.row, length(uniq.u), length(uniq.t), ate.n, att.n, length(uniq.u)*length(uniq.t), verbose)
 271 |                 W <- matrix(W, nrow=length(uniq.t), ncol=length(uniq.u), byrow=T)
 272 |                 data$W.it <- VectorizeC(as.matrix(W), data$t.index, data$u.index, tn.row)
 273 |             }
 274 |             
 275 |             ## Time fixed effects models
 276 |             if ( (method=="time" & qoi=="ate" & is.null(estimator) ) | (method=="time" & qoi=="att" & is.null(estimator)) ) {
 277 |                 W <- GenWeightsTime(data$t.index, data$u.index, data$TR, data$C.it, tn.row, length(uniq.t), length(uniq.u), ate.n, att.n, length(uniq.t)*length(uniq.u), verbose)
 278 |                 W <- matrix(W, nrow=length(uniq.t), ncol=length(uniq.u), byrow=T)
 279 |                 data$W.it <- VectorizeC(as.matrix(W), data$t.index, data$u.index, tn.row)
 280 |             }
 281 |             
 282 |             
 283 |             ## Within Unit First Difference
 284 |             if(( (method=="unit") && (qoi == "ate") && (!is.null(estimator) && estimator == "fd")) | ((method == "unit") && (qoi =="att") && (!is.null(estimator) && estimator == "fd"))) {
 285 |                 W <- GenWeightsFD(data$u.index, data$t.index, data$TR, data$C.it, tn.row, length(uniq.u), length(uniq.t), ate.n, att.n, verbose)
 286 |                 W <- matrix(W, nrow=length(uniq.t), ncol=length(uniq.u), byrow=T)
 287 |                 data$W.it <- VectorizeC(as.matrix(W), data$t.index, data$u.index, tn.row)
 288 |             }
 289 |         }
 290 |         
 291 |         
 292 |         if (verbose) {
 293 |             cat("Weight calculation done \n")
 294 |             flush.console()
 295 |         }
 296 |         
 297 |         if(( (method=="unit") && (qoi == "ate") && (!is.null(estimator) && estimator == "fd")) | ((method == "unit") && (qoi =="att") && (!is.null(estimator) && estimator == "fd"))) {
 298 |             nz.obs <- sum(as.numeric(data$W.it !=0))
 299 |             if (verbose)
 300 |                 cat("\nTotal number of observations with non-zero weight:", nz.obs,"\n")
 301 |             flush.console()
 302 |         }
 303 |         
 304 |         
 305 | 
 306 | 
 307 | ### Demean based on the weights
 308 |         
 309 |         wdm.Data <- mf.sorted
 310 |         
 311 |         
 312 |         ## Add Weight and unit index
 313 |         wdm.Data$W <- data$W.it
 314 |         wdm.Data$unit <- data$u.index
 315 |         wdm.Data$time <- data$t.index
 316 |         wdm.Data <- as.data.frame(wdm.Data)
 317 | 
 318 |         ## data for traditional fixed effect
 319 |         dm.Data <- wdm.Data
 320 | 
 321 |         ## excluding zero weights data for weighted fixed effect
 322 |         
 323 |         nc <- ncol(wdm.Data)
 324 | 
 325 |         ## unique number of units and time with positive weights
 326 |         if (method == "unit"){
 327 |             unit.number.pw <- length(unique(wdm.Data$unit))
 328 |         } else if (method == "time"){
 329 |             unit.number.pw <- length(unique(wdm.Data$time))
 330 |         } else {
 331 |             stop("method should be either unit or time")
 332 |         }
 333 | 
 334 | 
 335 |         ## ## Creating the matrix for final analysis
 336 |         ## Data.dm <- as.data.frame(matrix(NA, ncol = nc-3, nrow = nrow(wdm.Data)))
 337 |         ## Data.wdm <- as.data.frame(matrix(NA, ncol = nc-3, nrow = nrow(wdm.Data)))
 338 | 
 339 |         ## Creating the matrix for final analysis
 340 |         Data.dm <- dm.Data
 341 |         Data.wdm <- wdm.Data
 342 | 
 343 | 
 344 |         ## colume names
 345 |         colnames(Data.dm) <- colnames(wdm.Data)[1:nc]
 346 |         colnames(Data.wdm) <- colnames(wdm.Data)[1:nc]
 347 | 
 348 |         
 349 |         for (k in 1:(nc-3)) {
 350 |             ## in C
 351 |             if (method == "unit"){
 352 |                 w.wdemean <- WWDemean(wdm.Data[,k], wdm.Data$W, wdm.Data$unit, unit.number.pw, nrow(wdm.Data))
 353 |                 demean <- Demean(dm.Data[,k], dm.Data$unit, unit.number, nrow(dm.Data))
 354 |                 Data.wdm[,k] <- as.vector(w.wdemean)
 355 |                 Data.dm[,k] <- as.vector(demean)
 356 |             }
 357 |             if (method == "time"){
 358 |                 w.wdemean <- WWDemean(wdm.Data[,k], wdm.Data$W, wdm.Data$time, unit.number.pw, nrow(wdm.Data))
 359 |                 demean <- Demean(dm.Data[,k], dm.Data$time, unit.number, nrow(dm.Data))
 360 |                 Data.wdm[,k] <- as.vector(w.wdemean)
 361 |                 Data.dm[,k] <- as.vector(demean)
 362 |             }
 363 | 
 364 |         }
 365 | 
 366 |         ## save weighted demeaned dataframe
 367 |         if (store.wdm == TRUE){
 368 |             Y.wdm <- Data.wdm[,1]
 369 |             X.wdm <- Data.wdm[,(2:(nc-3))]
 370 |         } else {
 371 |             Y.wdm <- NULL
 372 |             X.wdm <- NULL
 373 |         }
 374 |         
 375 | 
 376 |         
 377 |         ## change formula without intercept
 378 |         a <- unlist(strsplit(as.character(formula), "~"))
 379 |         formula.ni <- as.formula(paste(a[2], "~ -1 + ",  a[3]))
 380 |         ## print(formula.ni)
 381 |         
 382 |         ## final regression on weighted demeaned data
 383 |         fit.final <- lm(formula.ni, data = Data.wdm)
 384 |         fit.ols <- lm(formula.ni, data = Data.dm)
 385 | 
 386 |         ## brute force matrix calculation
 387 |         ## V <- as.matrix(Data.wdm[,2:(nc-3)])
 388 |         ## coef2 <- ginv(crossprod(V, V))%*% crossprod(V, Data.wdm[,1])
 389 |         
 390 |         
 391 |         ## residuals
 392 |         
 393 |         ## u.tilde <- sqrt(wdm.Data$W)*resid(fit.final) # NT x 1
 394 |         u.tilde <- resid(fit.final) # NT x 1
 395 |         
 396 |         ## alternatively
 397 |         ## u.tilde <- sqrt(wdm.Data$W) * (Data.wdm[,1] - as.matrix(Data.wdm[,2:(nc-3)])%*%c(fit.final$coef))
 398 |         u.hat <- as.matrix(resid(fit.ols))  # NT x 1
 399 | 
 400 |         ## saving results
 401 |         coef.wls <- fit.final$coef
 402 |         coef.ols <- fit.ols$coef
 403 |         d.f <- fit.final$df - J.u
 404 |         ## sigma2 <- (sum((sqrt(wdm.Data$W)*resid(fit.final))^2))/d.f
 405 |         sigma2 <- (sum((resid(fit.final))^2))/d.f
 406 | 
 407 |         ## alternatively
 408 |         ## sigma2.a <- sum(u.tilde^2)/d.f
 409 |         ## cat("compare:", sigma2, sigma2.a, "\n") # same...
 410 |         
 411 |         
 412 | ### Robust Standard Errors
 413 | 
 414 |         ## contructing de(weighted)-meaned X matrix
 415 | 
 416 |         ## in case only one covariate
 417 |         if (p == 1){ # one covariate case
 418 |             X.tilde <- as.matrix(Data.wdm[,2])
 419 |             X.hat <- as.matrix(Data.dm[,2])
 420 |         } else {
 421 |             X.tilde <- as.matrix(Data.wdm[,2:(1+p)])
 422 |             X.hat <- as.matrix(Data.dm[,2:(1+p)])
 423 |         }
 424 | 
 425 | 
 426 |         ## unit vector (the length should be same as nrow(X.tilde) and length(u.tilde)
 427 |         wdm.unit <- as.vector(Data.wdm$unit)
 428 |         
 429 | ### (Robust) standard errors
 430 | 
 431 |         ## cat("3: Robust error calculation started\n")
 432 |         
 433 | 
 434 |         ginv.XX.tilde <- ginv(crossprod(X.tilde, X.tilde))
 435 |         ginv.XX.hat <- ginv(crossprod(X.hat, X.hat))
 436 | 
 437 |         diag.ee.tilde <- c(u.tilde^2)
 438 |         diag.ee.hat <- c(u.hat^2)
 439 | 
 440 |         ## e <- environment()
 441 |         ## save(file = "dyadSE.RData", list = ls(), env = e)
 442 | 
 443 | 
 444 |         if(dyad.se == TRUE){
 445 | 
 446 |             std.error <- "Robust Standard Error for Dyadic Data"
 447 |             
 448 |             OmegaDyad <- function(X.tilde, e.tilde, dyadID, c1, c2) {
 449 | 
 450 |                 uniq.dyadID <- unique(dyadID)
 451 |                 
 452 |                 for(d in 1:length(uniq.dyadID)){
 453 |                     ## print(d)
 454 |                     dyad.d <- uniq.dyadID[d]
 455 |                     cty1 <- c1[which(dyadID == dyad.d)][1]
 456 |                     cty2 <- c2[which(dyadID == dyad.d)][1]
 457 |                     
 458 |                     idx.d <- which(dyadID == dyad.d)
 459 |                     x.d <- as.matrix(X.tilde[idx.d,])
 460 |                     e.d <- matrix(e.tilde[idx.d], ncol=1)
 461 |                     
 462 |                     ## consider all the other dyads in which country 1 or country
 463 |                     ## 2 is the member
 464 |                     idx.dprime <- which( (c1==cty1 | c2 == cty2) & dyadID!=dyad.d)
 465 |                     uniq.dprime <- unique(dyadID[idx.dprime])
 466 | 
 467 |                     ## in case only one year is observed
 468 |                     if(is.null(nrow(x.d))){
 469 |                         x.d <- t(matrix(x.d, ncol=1))
 470 |                         e.d <- as.matrix(as.numeric(e.d))
 471 |                         Omega.hat <- t(x.d) %*% e.d %*% t(e.d) %*% x.d
 472 |                     } else {
 473 |                         Omega.hat <- t(x.d) %*% e.d %*% t(e.d) %*% x.d
 474 |                     }
 475 | 
 476 |                     ## loop over all dyads that have one of the two members
 477 |                     for(p in 1:length(uniq.dprime)){
 478 |                         ## print(p)
 479 |                         dprime <- uniq.dprime[p]
 480 |                         idx.dprime <- which(dyadID == dprime)
 481 |                         x.dprime <- as.matrix(X.tilde[idx.dprime,])
 482 |                         e.dprime <- matrix(e.tilde[idx.dprime], ncol=1)
 483 | 
 484 |                         if(is.null(nrow((x.dprime)))){
 485 |                             x.dprime <- t(matrix(x.dprime, ncol=1))
 486 |                             e.dprime <- as.matrix(as.numeric(e.dprime))
 487 |                             Odprime <- t(x.d) %*% e.d %*% t(e.dprime) %*% x.dprime
 488 |                         } else {
 489 |                             Odprime <- t(x.d) %*% e.d %*% t(e.dprime) %*% x.dprime
 490 |                         }
 491 |                         
 492 |                         if(p==1){
 493 |                             Oprime <- Odprime
 494 |                         } else {
 495 |                             Oprime <- Oprime + Odprime 
 496 |                         }
 497 |                     }
 498 | 
 499 |                     ## storing results
 500 |                     if(d==1){
 501 |                         Omega.hat.dyad <- Omega.hat + Oprime
 502 |                     } else {
 503 |                         Omega.hat.dyad <- Omega.hat.dyad + Omega.hat + Oprime
 504 |                     }
 505 |                 }
 506 |                 return(Omega.hat.dyad)                
 507 |             }
 508 | 
 509 |             Omega.hat.DYAD <- OmegaDyad(X.tilde, u.tilde, data$dyad, data$c1, data$c2)
 510 |             Omega.hat.fe.DYAD <- OmegaDyad(X.hat, u.hat, data$dyad, data$c1, data$c2)
 511 | 
 512 |             Psi.hat.wfe <- (ginv.XX.tilde) %*% Omega.hat.DYAD %*% (ginv.XX.tilde)
 513 |             Psi.hat.fe <- (ginv.XX.hat) %*% Omega.hat.fe.DYAD %*% (ginv.XX.hat)
 514 | 
 515 |         } else if ((hetero.se == TRUE) & (auto.se == TRUE)) {# Default is Arellano
 516 |             std.error <- "Heteroscedastic / Autocorrelation Robust Standard Error"
 517 | 
 518 |             ## 1. arbitrary autocorrelation as well as heteroskedasticity (Eq 12)
 519 |             
 520 |             Omega.hat.HAC <- OmegaHatHAC(nrow(X.tilde), p, wdm.unit, J.u, X.tilde, u.tilde)
 521 |             Omega.hat.HAC <- matrix(Omega.hat.HAC, nrow = p, ncol = p)
 522 |             ## Omega.hat.HAC <- (1/(nrow(X.tilde)))* Omega.hat.HAC # without degree of freedom adjustment 
 523 |             Omega.hat.HAC <- 1/J.u * Omega.hat.HAC
 524 |             
 525 |             Omega.hat.fe.HAC <- OmegaHatHAC(nrow(X.hat), p, wdm.unit, J.u, X.hat, u.hat)
 526 |             Omega.hat.fe.HAC <- matrix(Omega.hat.fe.HAC, nrow = p, ncol = p)
 527 |             ## Omega.hat.fe.HAC <- (1/(nrow(X.hat))) * Omega.hat.fe.HAC # without degree of freedom adjustment
 528 |             Omega.hat.fe.HAC <- 1/J.u * Omega.hat.fe.HAC
 529 | 
 530 |             ## Psi.hat.wfe <- (nrow(X.tilde) * ginv.XX.tilde) %*% Omega.hat.HAC %*% (nrow(X.tilde) * ginv.XX.tilde)
 531 |             ## Psi.hat.fe <- (nrow(X.hat) * ginv.XX.hat) %*% Omega.hat.fe.HAC %*% (nrow(X.hat) * ginv.XX.hat)
 532 | 
 533 |             Psi.hat.wfe <- (J.u*ginv.XX.tilde) %*% Omega.hat.HAC %*% (J.u*ginv.XX.tilde)
 534 |             Psi.hat.fe <- (J.u*ginv.XX.hat) %*% Omega.hat.fe.HAC %*% (J.u*ginv.XX.hat)
 535 | 
 536 |             ## degrees of freedom adjustment
 537 |             Nnonzero <- length(which(data$W.it !=0))
 538 |             K <- nc-3
 539 |             dfHAC <- (J.u/(J.u-1)) * (Nnonzero/(Nnonzero-K+1))
 540 |             
 541 |             Psi.hat.wfe <- dfHAC * Psi.hat.wfe
 542 |             Psi.hat.fe <- dfHAC * Psi.hat.fe
 543 | 
 544 |             
 545 |         } else if ( (hetero.se == TRUE) & (auto.se == FALSE) ) {# independence across observations but heteroskedasticity
 546 |             std.error <- "Heteroscedastic Robust Standard Error"
 547 | 
 548 |             ## 2. independence across observations but heteroskedasticity (Eq 11)
 549 |             
 550 |             Omega.hat.HC <- (1/J.u)*(crossprod((X.tilde*diag.ee.tilde), X.tilde)) 
 551 |             Omega.hat.fe.HC <- (1/J.u)*(crossprod((X.hat*diag.ee.hat), X.hat))  
 552 |             
 553 |             
 554 | ### Stock-Watson (Econometrica 2008: Eq(6)): Bias-asjusted for balance panel
 555 |             if (unbiased.se == TRUE) {
 556 |                 ## check if panel is balanced
 557 |                 if (sum(as.numeric(apply(matrix(table(wdm.unit)), 1, mean) != mean(matrix(table(wdm.unit)))))  == 0 ){
 558 |                     std.error <- "Heteroskedastic Standard Error (Stock-Watson Biased Corrected)"
 559 |                     
 560 |                     B.hat <- matrix(0, nrow=dim(X.tilde)[2], ncol=dim(X.tilde)[2])
 561 |                     B2.hat <- matrix(0, nrow=dim(X.hat)[2], ncol=dim(X.hat)[2])
 562 |                     for (i in 1:J.u) {
 563 |                         ## cat("unit", i, "\n")
 564 |                         X.i <- X.tilde[Data.wdm$unit == i,]
 565 |                         X2.i <- X.hat[Data.wdm$unit == i,]
 566 |                         ## print(sum(as.numeric(Data.wdm$unit == i)))
 567 |                         ## print(X.i)
 568 |                         if (sum(as.numeric(Data.wdm$unit == i)) > 1) { 
 569 |                             u.i <- u.tilde[Data.wdm$unit == i]
 570 |                             u2.i <- u.hat[Data.wdm$unit == i]
 571 |                             ## print(length(u.i))
 572 |                             flush.console()
 573 |                             XX.i <- crossprod(X.i, X.i)
 574 |                             XX2.i <- crossprod(X2.i, X2.i)
 575 |                             B.hat <- B.hat + ((1/J.t)* XX.i*(1/(J.t-1))*sum(u.i^2))
 576 |                             B2.hat <- B2.hat + ((1/J.t)* XX2.i*(1/(J.t-1))*sum(u2.i^2))
 577 |                         }
 578 |                     }
 579 | 
 580 |                     B.hat <- B.hat * (1/J.u)
 581 |                     B2.hat <- B2.hat * (1/J.u)
 582 | 
 583 |                     cat("time", J.t, "\n")
 584 |                     Sigma_HRFE <- ((J.t-1)/(J.t-2))*(Omega.hat.HC - (1/(J.t-1))*B.hat)
 585 |                     ## Psi.hat.wfe <- (nrow(X.tilde) * ginv.XX.tilde) %*% Sigma_HRFE %*% (nrow(X.tilde) * ginv.XX.tilde)
 586 |                     Psi.hat.wfe <- (J.u*ginv.XX.tilde) %*% Sigma_HRFE %*% (J.u*ginv.XX.tilde)
 587 | 
 588 |                     Sigma2_HRFE <- ((J.t-1)/(J.t-2))*(Omega.hat.fe.HC - (1/(J.t-1))*B2.hat)
 589 |                     ## Psi.hat.fe <- (nrow(X.hat) * ginv.XX.hat) %*% Sigma2_HRFE %*% (nrow(X.hat) * ginv.XX.hat)
 590 |                     Psi.hat.fe <- (J.u*ginv.XX.hat) %*% Sigma2_HRFE %*% (J.u*ginv.XX.hat)
 591 |                     
 592 |                     
 593 |                 } else { 
 594 |                     stop ("unbiased.se == TRUE is allowed only when panel is balanced")
 595 |                 }
 596 |             }
 597 |             
 598 |             Psi.hat.wfe <- (J.u*ginv.XX.tilde) %*% Omega.hat.HC %*% (J.u*ginv.XX.tilde)
 599 |             Psi.hat.fe <- (J.u*ginv.XX.hat) %*% Omega.hat.fe.HC %*% (J.u*ginv.XX.hat)
 600 | 
 601 |             ## degrees of freedom adjustment: G / (G -1) * N / (N - K + 1)
 602 |             ## where G is the number of groups (number of fixed effects),
 603 |             ## N is the number of non-zero weights
 604 | 	    Nnonzero <- length(which(data$W.it !=0))
 605 |             K <- nc-3
 606 |             dfHC <- (J.u/(J.u-1)) * (Nnonzero/(Nnonzero-K+1))
 607 |             Psi.hat.wfe <- dfHC * Psi.hat.wfe
 608 |             Psi.hat.fe <- dfHC * Psi.hat.fe
 609 |             
 610 | 
 611 |         } else if ( (hetero.se == FALSE) & (auto.se == FALSE) ) {# indepdence and homoskedasticity
 612 | 
 613 |             stop("standard errors with independence and homoskedasticity is not supported")
 614 | 
 615 |             ## std.error <- "Homoskedastic Standard Error"
 616 |             
 617 |             ## ## Psi.hat.wfe <- nrow(X.tilde) * (sigma2 * ginv.XX.tilde)
 618 |             ## Psi.hat.wfe <- J.u * (sigma2 * ginv.XX.tilde)            
 619 |             
 620 |             ## ## Psi.hat.fe <- nrow(X.hat) * vcov(fit.ols)
 621 |             ## Psi.hat.fe <- J.u * vcov(fit.ols)            
 622 | 
 623 |         } else if ( (hetero.se == FALSE) & (auto.se == TRUE) ) {# Kiefer
 624 | 
 625 |             stop ("robust standard errors with autocorrelation and homoskedasiticy is not supported")
 626 | 
 627 |         }
 628 | 
 629 | 
 630 |         if(dyad.se == TRUE){
 631 |             var.cov <- Psi.hat.wfe 
 632 |             var.cov.fe <- Psi.hat.fe 
 633 |         } else {
 634 |             var.cov <- Psi.hat.wfe * (1/J.u)
 635 |             var.cov.fe <- Psi.hat.fe *(1/J.u)
 636 |         }
 637 | 
 638 | 
 639 | ### White (1980) Test: Theorem 4
 640 | 
 641 |         Nnonzero <- length(which(data$W.it !=0))
 642 | 
 643 |         diag.ee <- c(u.hat) * c(u.tilde)
 644 |         
 645 |         Lambda.hat1 <-  1/((nrow(X.hat) - J.u - p))* (crossprod((X.hat*diag.ee), X.tilde))  
 646 |         Lambda.hat2 <-  1/((nrow(X.tilde) - J.u - p))* (crossprod((X.tilde*diag.ee), X.hat))  
 647 | 
 648 | 
 649 |         Phi.hat <- Psi.hat.wfe + Psi.hat.fe - (Nnonzero*ginv.XX.hat) %*% Lambda.hat1 %*% (Nnonzero*ginv.XX.tilde) - (Nnonzero*ginv.XX.tilde) %*% Lambda.hat2 %*% (Nnonzero*ginv.XX.hat)
 650 | 
 651 |         rm(Lambda.hat1, Lambda.hat2)
 652 |         gc()
 653 | 
 654 |         ## White test: null hypothesis is ``no misspecification''
 655 |         ## white.stat <- nrow(X.hat) * t(coef.ols - coef.wls) %*% ginv(Phi.hat) %*% (coef.ols - coef.wls)
 656 |         white.stat <- Nnonzero * t(coef.ols - coef.wls) %*% ginv(Phi.hat) %*% (coef.ols - coef.wls)
 657 |         
 658 |         test.null <- pchisq(as.numeric(white.stat), df=p, lower.tail=F) < White.alpha
 659 |         white.p <- pchisq(as.numeric(white.stat), df=p, lower.tail=F)
 660 | 
 661 |         
 662 |         flush.console()
 663 |         
 664 |         
 665 |         ## ## compare with sandwich standard error (essentially same)
 666 | 
 667 |         ## ## cat("sandwich package regression se HC:", print(sqrt(diag((vcovHC(fit.traditional, type="HC")[1:p,1:p])))), "\n")
 668 |         ## ## cat("sandwich package regression se HC0:", print(sqrt(diag((vcovHC(fit.traditional, type="HC0")[1:p,1:p])))), "\n")
 669 |         ## ## cat("sandwich package regression se HC1:", print(sqrt(diag((vcovHC(fit.traditional, type="HC1")[1:p,1:p])))), "\n")
 670 |         ## ## cat("sandwich package regression se HC2:", print(sqrt(diag((vcovHC(fit.traditional, type="HC2")[1:p,1:p])))), "\n")
 671 |         ## ## cat("sandwich package regression se HC3:", print(sqrt(diag((vcovHC(fit.traditional, type="HC3")[1:p,1:p])))), "\n")
 672 |         ## ## cat("sandwich package regression se HC4:", print(sqrt(diag((vcovHC(fit.traditional, type="HC4")[1:p,1:p])))), "\n")
 673 |         ## ## cat("sandwich package regression se HC4m:", print(sqrt(diag((vcovHC(fit.traditional, type="HC4m")[1:p,1:p])))), "\n")
 674 |         ## ## cat("sandwich package regression se HC5:", print(sqrt(diag((vcovHC(fit.traditional, type="HC5")[1:p,1:p])))), "\n")
 675 |         
 676 | 
 677 |         ## Creating a weight verctor
 678 |         ## original index
 679 |         idx <- paste(orig.unit.idx, orig.time.idx, sep="_")
 680 |         a <- units
 681 |         b <- times
 682 |         Wv <- as.vector(W) # as vector
 683 |         a1 <- rep(as.character(a$unit), each=nrow(W))
 684 |         b1 <- rep(as.character(b$time), ncol(W))
 685 |         idxall <- paste(a1, b1, sep="_")
 686 |         idxall.sub <- idxall[which(idxall %in% idx)]
 687 |         W.it <- Wv[which(idxall %in% idx)]
 688 |         u.sub <- unlist(lapply(idxall.sub,
 689 |                                function(x) strsplit(x, "_")[[1]][1]))
 690 |         t.sub <- unlist(lapply(idxall.sub,
 691 |                                function(x) strsplit(x, "_")[[1]][2]))
 692 |         cmd <- paste("W1 <- data.frame(", unit.index, "= u.sub)", sep="")
 693 |         eval(parse(text=cmd))
 694 |         if(is.null(time.index)){
 695 |             W1$obs.idx <- t.sub
 696 |         } else {
 697 |             cmd2 <- paste("W1$", time.index, " <- t.sub", sep="")
 698 |             eval(parse(text=cmd2))
 699 |         }
 700 |         W1$W.it <- W.it
 701 | 
 702 |         ## ensuring the order reflects the original idx
 703 |         mf$W.it <- W.it[match(idxall.sub, idx)]
 704 |         u.orig <- unlist(lapply(idx,
 705 |                                 function(x) strsplit(x, "_")[[1]][1]))
 706 |         t.orig <- unlist(lapply(idx,
 707 |                                 function(x) strsplit(x, "_")[[1]][2]))
 708 |         cmd <- paste("D <- data.frame(", unit.index, "= u.orig)", sep="")
 709 |         eval(parse(text=cmd))
 710 |         if(is.null(time.index)){
 711 |             D$obs.idx <- t.orig
 712 |         } else {
 713 |             cmd2 <- paste("D$", time.index, " <- t.orig", sep="")
 714 |             eval(parse(text=cmd2))
 715 |         }
 716 |         
 717 |         mf <- cbind(D, mf)
 718 |         
 719 |         Num.nonzero <- length(which(!W1$weights==0))
 720 |         
 721 | ###Saving results
 722 | 
 723 | 
 724 |         z <- list(coefficients = coef.wls,
 725 |                   x = x[,-1,drop=FALSE],
 726 |                   y = y,
 727 |                   mf = mf,
 728 |                   call = wfe.call,
 729 |                   vcov = var.cov,
 730 |                   se = sqrt(diag(var.cov)),
 731 |                   sigma = sqrt(sigma2),
 732 |                   df = d.f,
 733 |                   residuals = y - (x[,-1,drop=FALSE] %*% coef.wls),
 734 |                   W = W1,
 735 |                   Num.nonzero = Num.nonzero,
 736 |                   uniq.n.units = J.u,
 737 |                   units = units,
 738 |                   times = times,
 739 |                   method = method,
 740 |                   causal = causal,
 741 |                   est = est,
 742 |                   std.error = std.error,
 743 |                   White.pvalue = white.p,
 744 |                   White.alpha = White.alpha,
 745 |                   White.stat = white.stat,
 746 |                   White.test = test.null,
 747 |                   Y.wdm = Y.wdm,
 748 |                   X.wdm = X.wdm)
 749 |         class(z) <- "wfe"
 750 |         z
 751 | 
 752 | ### ********************************************************
 753 | ### Two-way Weighted Fixed Effects
 754 | ### ********************************************************
 755 |     } else {
 756 |         ## if (verbose) {
 757 |         ##     did <- CalDID(data$u.index, data$t.index, data$TR, data$C.it,
 758 |         ##                   y, tn.row, length(uniq.u), length(uniq.t), ate.n, att.n, verbose)
 759 |         ##     cat("\nMulti-period DID estimate with no covariate adjustments is", did ,"\n")
 760 |         ##     flush.console()
 761 |         ## }
 762 |         
 763 |         ## Differences-in-difference
 764 |         if(( (method=="unit") & (qoi == "ate") & (!is.null(estimator) & estimator == "did")) |
 765 |            ( (method == "unit") & (qoi =="att") & (!is.null(estimator) & estimator == "did")) |
 766 |            ( (method=="unit") & (qoi == "ate") & (!is.null(estimator) & estimator == "Mdid")) |
 767 |            ( (method == "unit") & (qoi =="att") & (!is.null(estimator) & estimator == "Mdid"))       
 768 |            ) {
 769 | 
 770 |             method <- "Weighted Two-way"
 771 |             ## Standard Fixed effect
 772 |             if (unweighted == TRUE) {
 773 |                 data$W.it <- rep(1, nrow(data))
 774 |                 W <- matrix(1,nrow=length(uniq.t), ncol=length(uniq.u))
 775 |             } else {
 776 |                 if (verbose) {
 777 |                     cat("\nWeight calculation started ")
 778 |                     flush.console()
 779 | 
 780 |                 }
 781 |                 if(estimator == "Mdid"){
 782 |                     if(is.null(maxdev.did)){
 783 |                         maxdev.did <- -1
 784 |                         if (verbose) {
 785 |                             cat(": Nearest Neighbor Matching\n")
 786 |                             flush.console()
 787 |                         }
 788 |                         
 789 |                     } else {
 790 |                         if (verbose) {
 791 |                             cat(": Matching on Pre-Treatment Outcome Within Maximum Deviation", maxdev.did,"\n")
 792 |                             flush.console()
 793 |                         }
 794 |                         
 795 |                         maxdev.did <- as.numeric(maxdev.did)
 796 |                     }
 797 |                     WDiD <- GenWeightsMDID(data$u.index, data$t.index, data$TR, data$C.it, y, maxdev.did,
 798 |                                            tn.row, length(uniq.u), length(uniq.t), ate.n, att.n, verbose)
 799 | 
 800 |                 } else {
 801 |                     WDiD <- GenWeightsDID(data$u.index, data$t.index, data$TR, data$C.it,
 802 |                                           tn.row, length(uniq.u), length(uniq.t), ate.n, att.n, verbose)
 803 |                 }
 804 |                 
 805 |                 W <- matrix(WDiD, nrow=length(uniq.t), ncol=length(uniq.u), byrow=T)            
 806 |                 data$W.it <- VectorizeC(as.matrix(W), data$t.index, data$u.index, tn.row)
 807 |                 
 808 |                 if (verbose) { 
 809 |                     cat("\nWeight calculation done \n")
 810 |                     flush.console()
 811 |                 }
 812 |                 
 813 |             }
 814 |             ## e <- environment()
 815 |             ## save(file = "temp.RData", list = ls(), env = e)
 816 |             
 817 |             ## creating index for sparse dummy matrix
 818 | 
 819 |             u <- as.matrix(table(data$u.index))
 820 |             
 821 |             Udummy.i <- seq(1:sum(u))
 822 |             Udummy.j <- c()
 823 | 
 824 |             for (j in 1:length(uniq.u)) {
 825 |                 Udummy.j <- c(Udummy.j, rep(j, u[j,1]))
 826 |             }
 827 |             Udummy  <- sparseMatrix(x=1, i=Udummy.i, j=Udummy.j)
 828 | 
 829 |             t <- as.matrix(table(data$u.index, data$t.index))
 830 | 
 831 |             ## checking panel structure
 832 |             if (verbose)
 833 |                 if (length(which(t==0)) > 0){
 834 |                     cat("\nUnbalanced Panel Data\n")
 835 |                 }
 836 |             if ( length(which(t>1)) > 0 ){
 837 |                 stop ("\nunit-time pair is not unique\n")
 838 |             }
 839 |             flush.console()
 840 |             
 841 |             ## this takes time: should be made more efficient
 842 |             Tdummy <- array(0,dim=c(0,length(uniq.t)))
 843 |             for (j in 1:nrow(t)) {
 844 |                 Tdummy <- rbind(Tdummy,  Diagonal(x = t[j,], n=length(uniq.t)))
 845 |             }
 846 | 
 847 |             ## when panel is unbalanced there will be rows of zeros
 848 |             zero <- which(apply(Tdummy, 1, mean) == 0)
 849 |             if(length(zero) > 0) {
 850 |                 Tdummy <- Tdummy[-zero,]
 851 |             }
 852 | 
 853 |             ## if (verbose)
 854 |             ##   cat("\n Dummy creation done \n")
 855 |             ## flush.console()
 856 | 
 857 | 
 858 | #########################################################################
 859 | ### Projection for standard twoway FE
 860 | 
 861 |             ## if (verbose)
 862 |             ##   cat("\n Standard FE Projection Started \n")
 863 |             ## flush.console()
 864 | 
 865 |             ## this step takes time
 866 |             P1 <- Udummy %*% tcrossprod(Diagonal(x=1/as.vector(table(data$u.index))), Udummy)
 867 |             
 868 |             ## e <- environment()
 869 |             ## save(file = "test.RData", list = ls(), env = e)
 870 |             
 871 |             Q1 <- Diagonal(x=1, n=nrow(Udummy)) - P1
 872 |             
 873 | 
 874 |             ## Q: not too sparse
 875 |             Q <- Q1 %*% Tdummy
 876 |             Q.QQginv <- Q %*% ginv(as.matrix(crossprod(Q)))
 877 | 
 878 |             X <- as.matrix(X)
 879 |             Y <- as.matrix(data$y)
 880 |             
 881 |             YX <- cbind(Y,X)
 882 |             
 883 |             Data.2wdm <- as.data.frame(as.matrix(YX - P1%*%YX - Q.QQginv %*% crossprod(Q,YX)))
 884 |             colnames(Data.2wdm) <- colnames(mf.sorted)
 885 | 
 886 |             rm(YX, P1, Q.QQginv, Q)
 887 |             gc()
 888 | 
 889 |             ## if (verbose)
 890 |             ##   cat("\n Standard FE Projection done \n")
 891 |             ## flush.console()
 892 |             
 893 |             a <- unlist(strsplit(as.character(formula), "~"))
 894 |             formula.ni <- as.formula(paste(a[2], "~ -1 + ",  a[3]))
 895 | 
 896 | 
 897 |             ## final regression on 2way demeaned data
 898 |             fit.ols <- lm(formula.ni, data = Data.2wdm)
 899 |             
 900 |             coef.ols <- fit.ols$coef
 901 |             resid.ols <- resid(fit.ols)
 902 | 
 903 |             u.hat <- as.matrix(resid.ols)
 904 |             X.hat <- as.matrix(Data.2wdm[,-1])
 905 |             rm(Data.2wdm)
 906 |             gc()
 907 |             
 908 |             
 909 | ############################################################
 910 | 
 911 |             
 912 |             ## subset observations with non-zero weights
 913 |             if (White == TRUE){
 914 |                 nz.index <- seq(1,tn.row)
 915 |             } else { # cannot calculate White statistics 
 916 |                 ## exclude zero-weights observations for efficient calculation
 917 |                 nz.index <- data$W.it !=0
 918 |                 tn.row <- length(which(data$W.it !=0))
 919 |             }
 920 | 
 921 |             nz.obs <- sum(as.numeric(data$W.it !=0))
 922 |             
 923 |             X <- as.matrix(X)[nz.index,]
 924 |             Y <- as.matrix(data$y)[nz.index]
 925 | 
 926 | 
 927 |             ## removing zero weights rows
 928 |             Udummy <- Udummy[nz.index,]
 929 |             Tdummy <- Tdummy[nz.index,]
 930 | 
 931 | 
 932 | ### removing zero columns for full-rank (after deleting zero weights observations)
 933 |             
 934 |             ## 1. Unit dummies
 935 |             
 936 |             u.zero <- try(which(as(apply(Udummy, 2, sum), "sparseVector") == 0), silent=TRUE)
 937 | 
 938 |             if (class(u.zero) == "try-error") {
 939 |                 u.zero <- c()
 940 |                 for (i in 1:ncol(Udummy)) {
 941 |                     if (sum(Udummy[,i]) == 0){
 942 |                         temp <- i
 943 |                         u.zero <- c(u.zero, temp)
 944 |                     }
 945 |                 }
 946 |                 if (verbose)
 947 |                     cat("\nReached Memory Limit: White == FALSE option is recommended\n")
 948 |                 flush.console()
 949 | 
 950 |                 if (length(u.zero) > 0) {
 951 |                     Udummy <- Udummy[,-u.zero]
 952 |                     gc()
 953 |                 }
 954 |             } else {
 955 |                 if (length(u.zero) > 0) {
 956 |                     Udummy <- Udummy[,-u.zero]
 957 |                     gc()
 958 |                 }
 959 |             }
 960 |             
 961 |             ## if (verbose)
 962 |             ##   cat("\n Udummy done\n")
 963 |             ## flush.console()
 964 | 
 965 | 
 966 |             ## 2. Time dummies
 967 | 
 968 |             if (length(which(as(apply(Tdummy, 2, sum), "sparseVector")==0)) > 0) {
 969 |                 zero <- which(as(apply(Tdummy, 2, sum), "sparseVector") == 0)
 970 |                 n.zero <- length(zero)
 971 |                 Tdummy <- Tdummy[,-zero]
 972 |                 ## delete last column
 973 |                 ## last <- ncol(Tdummy) 
 974 |                 ## Tdummy <- Tdummy[,-last]
 975 |                 ## adding a column of 1000's for numerical stability of ginv
 976 |                 Tdummy <- cbind(Tdummy, rep(1000, nrow(Tdummy)))
 977 |                 gc()
 978 |             } else {
 979 |                 last <- ncol(Tdummy)
 980 |                 Tdummy <- Tdummy[,-last] # for identification exclude the last year dummy
 981 |                 ## adding a column of 1's for numerical stability of ginv
 982 |                 Tdummy <- cbind(Tdummy, rep(1000, nrow(Tdummy)))
 983 |                 gc()
 984 |             }
 985 | 
 986 |             ## if (verbose)
 987 |             ##   cat("\n Tdummy done\n")
 988 |             ## flush.console()
 989 | 
 990 |             ## number of columns for Unit/Time dummy matrix
 991 |             n.Udummy <- ncol(Udummy)
 992 |             n.Tdummy <- ncol(Tdummy)
 993 | 
 994 |             
 995 |             ## combining unit dummy matrix and X matrix
 996 |             D <- Matrix(cbind(Udummy, Tdummy))
 997 |             ## Note: Tdummy part does not have zero columns, but Udummy part has it
 998 |             ## will be addressed thie issue below by n.zero
 999 |             
1000 |             ## final number of dummies
1001 |             fn.dummies <- ncol(D) # final number of dummies
1002 | 
1003 | 
1004 | 
1005 | ### Projection onto Complex-plane
1006 | 
1007 |             
1008 |             ## if (verbose)
1009 |             ##   cat("\n Calculation for Projection Matrix Started \n")
1010 |             ## flush.console()
1011 | 
1012 |             ## sqrt of weights (imaginary numbers)
1013 |             Im.sqrt <- function(weights) {
1014 |                 if (weights >= 0) # real number
1015 |                     im.w <- complex(real=sqrt(weights), imaginary =0)
1016 |                 if (weights < 0) # imaginary number
1017 |                     im.w <- complex(real=0, imaginary = sqrt(-weights))
1018 |                 invisible(im.w)
1019 |             }
1020 | 
1021 |             ## vector of sqrt(W.it)
1022 |             w.sqrt <- sapply(data$W.it[nz.index], Im.sqrt)
1023 |             
1024 | 
1025 | #########################################################################
1026 | 
1027 |             
1028 | ### Matrix multiplication: two sparse matrix : A=R1+I1i, B=R2+I2i
1029 | 
1030 |             ## A%*%B: result is a list where [[1]] is real part [[2]] is imaginary part of complex matrix multiplication
1031 |             Sparse_compMatrixMultiply <- function(R1,I1,R2,I2) {
1032 |                 result <- list()
1033 |                 result[[1]] <- drop0(R1%*%R2) - drop0(I1%*%I2)
1034 |                 result[[2]] <- drop0(R1%*%I2) + drop0(I1%*%R2)
1035 |                 result
1036 |             }
1037 | 
1038 |             ## A %*% t(B): result is a list where [[1]] is real part [[2]] is imaginary part of complex matrix multiplication
1039 |             Sparse_compMatrix_tcrossprod <- function(R1,I1,R2,I2) {
1040 |                 ## real part
1041 |                 result <- list()
1042 |                 result[[1]] <- drop0(tcrossprod(R1, R2)) - drop0(tcrossprod(I1, I2))
1043 |                 result[[2]] <- drop0(tcrossprod(R1, I2)) + drop0(tcrossprod(I1, R2))
1044 |                 result
1045 |             }
1046 | 
1047 |             ## t(A) %*% B: result is a list where [[1]] is real part [[2]] is imaginary part of complex matrix multiplication
1048 |             Sparse_compMatrix_crossprod <- function(R1,I1,R2,I2) {
1049 |                 ## real part
1050 |                 result <- list()
1051 |                 result[[1]] <- drop0(crossprod(R1, R2)) - drop0(crossprod(I1, I2))
1052 |                 result[[2]] <- drop0(crossprod(R1, I2)) + drop0(crossprod(I1, R2))
1053 |                 result
1054 |             }
1055 | 
1056 | 
1057 |             R1 <- Diagonal(x = Re(w.sqrt), n=tn.row)
1058 |             I1 <- Diagonal(x = Im(w.sqrt), n=tn.row)
1059 | 
1060 |             yL <- list()
1061 |             yL[[1]] <- Matrix(Y)
1062 |             yL[[2]] <- Matrix(0, nrow=nrow(yL[[1]]),  ncol=ncol(yL[[1]]))
1063 |             y.starL <- Sparse_compMatrixMultiply(R1,I1, yL[[1]], yL[[2]])
1064 |             rm(yL)
1065 |             gc()
1066 |             
1067 |             xL <- list()
1068 |             xL[[1]] <- Matrix(X)
1069 |             xL[[2]] <- Matrix(0, nrow=nrow(xL[[1]]),  ncol=ncol(xL[[1]]))
1070 |             x.starL <- Sparse_compMatrixMultiply(R1,I1, xL[[1]], xL[[2]])
1071 |             rm(xL)
1072 |             gc()
1073 | 
1074 | #########################################################################
1075 |             
1076 |             ## create D1, and D2 
1077 |             R2 <- Matrix(D)
1078 |             I2 <- Matrix(0, nrow=nrow(D), ncol=ncol(D))
1079 | 
1080 |             D.starL <- Sparse_compMatrixMultiply(R1,I1,R2,I2)
1081 | 
1082 |             ## e <- environment()
1083 |             ## save(file = "temp.RData", list = ls(), env = e)
1084 |             
1085 |             rm(R2, I2)
1086 |             gc()
1087 | 
1088 | 
1089 |             D1.starL <- D2.starL <- list()
1090 |             
1091 |             ## e <- environment()
1092 |             ## save(file = "temp.RData", list = ls(), env = e, compress = TRUE)
1093 |             
1094 | 
1095 |             D1.starL[[1]] <- drop0(D.starL[[1]][,1:n.Udummy])
1096 |             D1.starL[[2]] <- drop0(D.starL[[2]][,1:n.Udummy])
1097 |             D2.starL[[1]] <- drop0(D.starL[[1]][,((n.Udummy+1):(n.Udummy+n.Tdummy))])
1098 |             D2.starL[[2]] <- drop0(D.starL[[2]][,((n.Udummy+1):(n.Udummy+n.Tdummy))])
1099 |             ## cat("Number of columns for D1", n.Udummy, "\n")
1100 |             ## cat("Number of columns for D2", n.Tdummy, "\n")
1101 |             
1102 |             rm(D.starL)
1103 |             gc()
1104 | #########################################################################
1105 |             
1106 |             ## sum of sqrt(W.it) across years for each dyad
1107 |             general.inv <- function(weight, tol = tol){
1108 |                 if(abs(weight) < tol) {
1109 |                     out <- 0
1110 |                 } else {
1111 |                     out <- 1/weight
1112 |                 }
1113 |                 out
1114 |             }
1115 |             
1116 |             sum.sqrtW <- complex(real=apply(Sparse_compMatrixMultiply(R1,I1,D1.starL[[1]],D1.starL[[2]])[[1]], 2, sum), imaginary=rep(0, length(J.u)))
1117 |             rm(R1, I1)
1118 |             gc()
1119 | 
1120 | 
1121 |             inv.weight <- sapply(sum.sqrtW, general.inv, tol)
1122 | 
1123 |             rm(sum.sqrtW)
1124 |             gc()
1125 |             
1126 |             ## e <- environment()
1127 |             ## save(file = "temp.RData", list = ls(), env = e)
1128 |             
1129 |             ginvW <- list()
1130 |             ginvW[[1]] <- Diagonal(x = Re(inv.weight))
1131 |             ginvW[[2]] <- Diagonal(x = Im(inv.weight))
1132 | 
1133 |             rm(inv.weight)
1134 |             gc()
1135 | 
1136 |             Dginv <- Sparse_compMatrixMultiply(D1.starL[[1]], D1.starL[[2]], ginvW[[1]], ginvW[[2]])
1137 |             rm(ginvW)
1138 |             gc()
1139 | 
1140 |             P1L <-  Sparse_compMatrix_tcrossprod(Dginv[[1]], Dginv[[2]], D1.starL[[1]], D1.starL[[2]])
1141 | 
1142 |             rm(Dginv, D1.starL)
1143 |             gc()
1144 | 
1145 |             ## if (verbose) {
1146 |             ##   cat("\n P1 created\n")
1147 |             ##   flush.console()      
1148 |             ## }
1149 |             
1150 |             Q1L <- list()
1151 |             Q1L[[1]] <- drop0(Diagonal(x=1, n=nrow(P1L[[1]])) - P1L[[1]])
1152 |             Q1L[[2]] <- drop0(Diagonal(x=0, n=nrow(P1L[[1]])) - P1L[[2]])
1153 | 
1154 |             ## if (verbose) {
1155 |             ##   cat("\n Q1 created\n")
1156 |             ##   flush.console()
1157 |             ## }
1158 |             
1159 |             Q <- try(Sparse_compMatrixMultiply(Q1L[[1]], Q1L[[2]], D2.starL[[1]], D2.starL[[2]]), silent = TRUE)
1160 |             if ((class(Q) == "try-error") & (White == TRUE)) {
1161 |                 stop ("Insufficient memory. White = FALSE option is needed")
1162 |             }
1163 |             rm(Q1L, D2.starL)
1164 |             gc()
1165 | 
1166 |             
1167 |             Q.matrix <- matrix(complex(real=as.matrix(Q[[1]]), imaginary=as.matrix(Q[[2]])), nrow=nrow(Q[[1]]))
1168 | 
1169 |             QQ.inv <- list()
1170 |             QQ.inv[[1]] <- drop0(Matrix(Re(ginv(crossprod(Q.matrix)))))
1171 |             QQ.inv[[2]] <- drop0(Matrix(Im(ginv(crossprod(Q.matrix)))))
1172 |             rm(Q.matrix)
1173 |             gc()
1174 | 
1175 |             PL <- list()
1176 | 
1177 |             Q.QQinv <- Sparse_compMatrixMultiply(Q[[1]], Q[[2]], QQ.inv[[1]], QQ.inv[[2]]) 
1178 |             rm(QQ.inv)
1179 |             gc()
1180 |             
1181 |             ## if (verbose) {
1182 |             ##   cat("\n Q.QQinv created\n")
1183 |             ##   flush.console()
1184 |             ## }
1185 | 
1186 | #########################################################################
1187 | ### Fast Projection in R
1188 | 
1189 | 
1190 |             YX.starL <- list()
1191 |             YX.starL[[1]] <- cbind(y.starL[[1]], x.starL[[1]])
1192 |             YX.starL[[2]] <- cbind(y.starL[[2]], x.starL[[2]])
1193 |             rm(y.starL, x.starL)
1194 |             gc()
1195 |             
1196 |             
1197 |             P1.YX <- Sparse_compMatrixMultiply(P1L[[1]], P1L[[2]], YX.starL[[1]], YX.starL[[2]])
1198 |             rm(P1L)
1199 |             gc()
1200 | 
1201 | 
1202 |             Q.YX <- Sparse_compMatrix_crossprod(Q[[1]], Q[[2]], YX.starL[[1]], YX.starL[[2]])
1203 |             rm(Q)
1204 |             gc()
1205 |             
1206 |             QQQQ.YX <- Sparse_compMatrixMultiply(Q.QQinv[[1]], Q.QQinv[[2]], Q.YX[[1]], Q.YX[[2]])
1207 |             
1208 |             ## cat("dimension of P1.YX:", dim(P1.YX[[1]]), "\n")
1209 |             ## cat("dimension of QQQQ.YX:", dim(QQQQ.YX[[1]]), "\n")
1210 | 
1211 |             ## TransformedL <- list()
1212 |             ## TransformedL[[1]] <- YX.starL[[1]] - P1.YX[[1]] - P2.YX[[1]]
1213 |             ## TransformedL[[2]] <- YX.starL[[2]] - P1.YX[[2]] - P2.YX[[2]]
1214 | 
1215 |             Transformed <- matrix(complex(real=as.vector(YX.starL[[1]] - P1.YX[[1]] - QQQQ.YX[[1]]), imaginary=as.vector(YX.starL[[2]] - P1.YX[[2]] - QQQQ.YX[[2]])), nrow=tn.row)
1216 |             rm(YX.starL, P1.YX, QQQQ.YX)
1217 |             
1218 |             y.tilde <- Transformed[,1]
1219 |             X.tilde <- as.matrix(Transformed[,-1])
1220 | 
1221 |             ## save weighted demeaned dataframe
1222 |             if (store.wdm == TRUE){
1223 |                 Y.wdm <- y.tilde
1224 |                 X.wdm <- X.tilde
1225 |             } else {
1226 |                 Y.wdm <- NULL
1227 |                 X.wdm <- NULL
1228 |             }
1229 | 
1230 |             rm(Transformed)
1231 |             gc()
1232 |             
1233 |             if (ncol(X.tilde) == 1) {
1234 |                 colnames(X.tilde) <- a[3]
1235 |             } else {
1236 |                 colnames(X.tilde) <- colnames(X)
1237 |             }
1238 | 
1239 |             ginv.XX.tilde <- ginv(crossprod(X.tilde))
1240 |             betaT <- ginv.XX.tilde%*% crossprod(X.tilde, y.tilde)
1241 |             if (length(betaT) == 1) {
1242 |                 colnames(betaT) <- a[3]
1243 |             }
1244 |             ## print(betaT)
1245 |             coef.wls <- matrix(as.double(Re(betaT)))
1246 |             rownames(coef.wls) <- colnames(X.tilde)
1247 |             
1248 |             ## e <- environment()
1249 |             ## save(file = "temp.RData", list = ls(), env = e)
1250 |             
1251 |             ## weighted residuals
1252 |             e.tilde <- (y.tilde - X.tilde %*% betaT)
1253 |             colnames(e.tilde) <- "e.tilde"
1254 |             ## true residuals
1255 |             resid <- try(1/w.sqrt * (y.tilde - X.tilde %*% betaT))
1256 | 
1257 |             ## in case zero weights observations are not excluded
1258 |             if (White == TRUE){
1259 |                 if (sum(as.numeric(w.sqrt==0)) > 0) { 
1260 |                     zero.index <- data$W.it ==0
1261 |                     resid[zero.index] <- 0
1262 |                 }
1263 |             }
1264 |             rm(w.sqrt)
1265 |             gc()
1266 |             
1267 |             ## check residuals      
1268 |             ## print(cbind(as.matrix(true.resid), e.tilde, data$W.it))
1269 |             ## print(cbind(sum(true.resid^2), sum(e.tilde^2)))
1270 | 
1271 | 
1272 |             ## diag.ee.tilde <- diag(tcrossprod(e.tilde,e.tilde))
1273 |             ## diag.resid <- as.vector(resid * resid)
1274 |             diag.ee.tilde <- as.vector(e.tilde * e.tilde)
1275 |             
1276 | #########################################################################
1277 | 
1278 |             ## cat("dimension of X.tilde:", dim(X.tilde), "\n")
1279 |             
1280 |             ## XX.hat <- crossprod(X.hat, X.hat)
1281 |             ginv.XX.hat <- ginv(crossprod(X.hat, X.hat))
1282 |             ## d.f <- length(y.tilde) - n.Udummy - n.Tdummy - dim(X.tilde)[2]
1283 |             d.f <- length(y.tilde)
1284 | 
1285 |             ## cat("Sum of squared residuals:", sum(resid^2), "\n")
1286 |             sigma2 <- as.double(Re(sum(resid^2)/d.f))
1287 |             
1288 |             ## Remove observations with zero weights
1289 |             ## data backup
1290 |             data.zero <- data
1291 |             zero.ind <- which(data$W.it==0)
1292 |             if(length(zero.ind) > 0){
1293 |                 data.nonzero <- data[-zero.ind, ]
1294 |             } else {
1295 |                 data.nonzero <- data
1296 |             }
1297 |             n.units <- length(unique(data$u.index))
1298 |             n.times <- length(unique(data$t.index))
1299 | 
1300 |             Mstar <- nrow(data.nonzero)            
1301 |             if (verbose)
1302 |                 cat("\nTotal number of observations with non-zero weight:", Mstar,"\n")
1303 |             flush.console()
1304 |             
1305 |             if(unweighted == FALSE){
1306 |                 n.nonzero.units <- length(unique(data.nonzero$u.index))
1307 |                 n.nonzero.times <- length(unique(data.nonzero$t.index))
1308 |             } else {
1309 |                 n.nonzero.units <- n.units
1310 |                 n.nonzero.times <- n.times
1311 |             }
1312 | 
1313 |             x.vars <- colnames(x)
1314 |             x.vars <- x.vars[-grep("Intercept", x.vars)]
1315 |             nK <- length(x.vars) 
1316 |             variables <- c("y", x.vars)
1317 | 
1318 |             ## e <- environment()
1319 |             ## save(file = "temp.RData", list = ls(), env = e)
1320 |             
1321 | 
1322 |             ## #######################################################################            
1323 |             ## (Robust) standard errors (GMM asymptotic variance for wfe)
1324 |             ## calculating GMM standard errors
1325 |             ## #######################################################################
1326 | 
1327 |             if ((hetero.se == TRUE) & (auto.se == TRUE)){
1328 | 
1329 |                 ## 1. arbitrary autocorrelation as well as heteroskedasticity (Eq 12)
1330 |                 std.error <- "Heteroscedastic / Autocorrelation Robust Standard Error"
1331 |                 ## stop ("Robust standard errors with autocorrelation is currently not supported")
1332 | 
1333 |                 ## ## MeatHAC takes unit level data with T_i rows and
1334 |                 ## ## compute t(X_i) %*% e_i %*% t(e_i) %*% X_i
1335 |                 ## MeatHAC <- function(x){
1336 |                 ##     Xtilde <- as.matrix(x[, -c(1,2)]) # removing u.index, e.tilde
1337 |                 ##     etilde <- x[,2]
1338 |                 ##     Meat <- t(Xtilde) %*% etilde %*% t(etilde) %*% Xtilde
1339 |                 ##     Meat <- matrix(Meat, ncol=ncol(Xtilde), nrow=ncol(Xtilde))
1340 |                 ##     return(Meat)
1341 |                 ## }
1342 | 
1343 |                 ## ## prepare data for vcov calculation & remove units with zero weights
1344 |                 ## D.tilde <- data.frame(u.index = data[,c("u.index")], e.tilde, X.tilde)
1345 |                 ## colnames(D.tilde)[1] <- c("u.index")
1346 |                 
1347 |                 ## if(length(zero.ind)>0){
1348 |                 ##     D.tilde <- D.tilde[-zero.ind,]
1349 |                 ##     e.tilde <- e.tilde[-zero.ind]
1350 |                 ## } else {
1351 |                 ##     D.tilde <- D.tilde
1352 |                 ##     e.tilde <- e.tilde
1353 |                 ## }
1354 | 
1355 | 
1356 |                 ## -----------------------------------------------------
1357 |                 ## vcov matrix for WFE 
1358 |                 ## -----------------------------------------------------
1359 | 
1360 |                 ## degrees of freedom adjustment
1361 |                 df_wfe2 <- (Mstar/(Mstar-1))*((Mstar-nK)/(Mstar- n.nonzero.units - n.nonzero.times - nK))
1362 |                 
1363 |                 ## ## meat part
1364 |                 ## XeeX <- lapply(split(D.tilde,D.tilde$u.index), MeatHAC)
1365 |                 ## XeeX.wfe <- matrix(0, nrow=nK, ncol=nK)
1366 |                 ## ## add unit level vcov
1367 |                 ## for(g in 1:n.nonzero.units){
1368 |                 ##     XeeX.wfe <- XeeX.wfe + XeeX[[g]]
1369 |                 ## }
1370 | 
1371 |                 ## ## sandwich estimator
1372 |                 ## Psi.hat.wfe2 <- df_wfe2 * (ginv.XX.tilde %*% XeeX.wfe %*% ginv.XX.tilde)
1373 |                 ## print(Psi.hat.wfe2)
1374 | 
1375 |                 XeeX <- as.double(comp_OmegaHAC(c(X.tilde), e.tilde, c(X.tilde), e.tilde,
1376 |                                                 dim(X.tilde)[1], dim(X.tilde)[2], data$u.index, J.u))
1377 |                 XeeX.wfe <- matrix(XeeX, nrow=ncol(X.tilde), ncol=ncol(X.tilde), byrow=T)
1378 |                 
1379 |                 Psi.hat.wfe <- df_wfe2*((ginv.XX.tilde %*% XeeX.wfe %*% ginv.XX.tilde))
1380 | 
1381 |                 ## print(Psi.hat.wfe)
1382 |                 
1383 |                 ## -----------------------------------------------------
1384 |                 ## vcov matrix for FE for White statistics calculation
1385 |                 ## -----------------------------------------------------
1386 | 
1387 |                 if (White == TRUE){
1388 | 
1389 |                     ## degrees of freedom adjustment                    
1390 |                     df_fe2 <- (nrow(X.hat)/(nrow(X.hat)-1))*((nrow(X.hat)-nK)/(nrow(X.hat)-n.units-n.times-nK))
1391 |                     
1392 |                     ## ## prepare data for vcov calculation
1393 |                     ## D.hat <- data.frame(u.index = data[,c("u.index")], u.hat, X.hat)
1394 |                     ## colnames(D.hat)[1] <- c("u.index")
1395 |                     
1396 |                     ## ## SE calcluation
1397 |                     ## XeeX <- lapply(split(D.hat,D.hat$u.index), MeatHAC)
1398 |                     ## XeeX.fe <- matrix(0, nrow=nK, ncol=nK)
1399 |                     ## ## add unit level vcov
1400 |                     ## for(g in 1:n.units){
1401 |                     ##     XeeX.fe <- XeeX.fe + XeeX[[g]]
1402 |                     ## }
1403 | 
1404 |                     ## Psi.hat.fe <- df_fe2 * (ginv.XX.hat %*% XeeX.fe %*% ginv.XX.hat)
1405 |                     ## print(Psi.hat.fe)
1406 |                     
1407 |                     XeeX <- OmegaHatHAC(nrow(X.hat), ncol(X.hat), data$u.index, J.u, X.hat, u.hat)
1408 |                     XeeX.fe <- matrix(XeeX, nrow = ncol(X.hat), ncol = ncol(X.hat))
1409 | 
1410 |                     Psi.hat.fe <- df_fe2 * (ginv.XX.hat %*% XeeX.fe %*% ginv.XX.hat)
1411 |                     ## print(Psi.hat.fe)
1412 | 
1413 |                     ## storing standard errors                    
1414 |                     var.cov.fe <- Psi.hat.fe
1415 |                     se.ols <- sqrt(diag(var.cov.fe))
1416 |                     ## print(Psi.hat.fe)
1417 |                     
1418 |                 }
1419 | 
1420 | 
1421 |             } else if ( (hetero.se == TRUE) & (auto.se == FALSE)) {
1422 |                 stop("Please set hetero.se == TRUE & auto.se == TRUE when you run two-way FE")
1423 |             } else if ( (hetero.se == FALSE) & (auto.se == FALSE) ) {# indepdence and homoskedasticity
1424 |                 stop("Please set hetero.se == TRUE & auto.se == TRUE when you run two-way FE")
1425 |             } else if ( (hetero.se == FALSE) & (auto.se == TRUE) ) {# Kiefer
1426 |                 stop ("Robust standard errors with autocorrelation and homoskedasiticy is not supported")
1427 |             }
1428 |             
1429 |             ## storing standard errors
1430 |             vcov.wfe <- Psi.hat.wfe 
1431 |             se.did <- as.double(Re(sqrt(diag(vcov.wfe))))
1432 |             
1433 |             if (verbose) {
1434 |               cat("\nStd.error calculation done\n")
1435 |               flush.console()
1436 |             }
1437 | 
1438 | ### White (1980) Test: Theorem 4
1439 | 
1440 |             if (White == TRUE){
1441 | 
1442 |                 df.white <- (nrow(X.hat)-nK)/(Mstar- n.nonzero.units - n.nonzero.times - nK)
1443 |                 
1444 |                 ## ## MeatHAC_White takes unit level data with T_i rows and
1445 |                 ## ## compute t(X_i) %*% e1_i %*% t(e2_i) %*% X_i
1446 |                 ## MeatHAC_White <- function(x,y){
1447 |                 ##     X1 <- as.matrix(x[, -c(1,2)]) # removing u.index, e.tilde
1448 |                 ##     X2 <- as.matrix(y[, -c(1,2)]) # removing u.index, e.tilde, e.hat
1449 |                 ##     e1 <- x[,2]
1450 |                 ##     e2 <- y[,2]
1451 |                 ##     Meat <- t(X1) %*% e1 %*% t(e2) %*% X2
1452 |                 ##     Meat <- matrix(Meat, ncol=ncol(X1), nrow=ncol(X1))
1453 |                 ##     ## print(Meat)
1454 |                 ##     return(Meat)
1455 |                 ## }
1456 |                 
1457 |                 ## ## -----------------------------------------------------
1458 |                 ## ## cov term esitmate for beta_fe2 - beta_wfe2
1459 |                 ## ## -----------------------------------------------------
1460 |                 
1461 |                 ## ## meat part
1462 |                 ## XeeX1 <- mapply(MeatHAC_White, split(D.tilde,D.tilde$u.index), split(D.hat,D.hat$u.index))
1463 |                 ## XeeX2 <- mapply(MeatHAC_White, split(D.hat,D.hat$u.index), split(D.tilde,D.tilde$u.index))
1464 |                 
1465 |                 ## Meat1 <- matrix(0, nrow=nK, ncol=nK)
1466 |                 ## Meat2 <- matrix(0, nrow=nK, ncol=nK)
1467 |                 ## ## add unit level vcov
1468 |                 ## for(g in 1:n.nonzero.units){
1469 |                 ##     if(nK==1){
1470 |                 ##         Meat1 <- Meat1 + matrix(XeeX1[g], ncol=nK, nrow=nK)
1471 |                 ##         Meat2 <- Meat2 + matrix(XeeX2[g], ncol=nK, nrow=nK)
1472 |                 ##     } else {
1473 |                 ##         Meat1 <- Meat1 + matrix(XeeX1[,g], ncol=nK, nrow=nK)
1474 |                 ##         Meat2 <- Meat2 + matrix(XeeX2[,g], ncol=nK, nrow=nK)
1475 |                 ##     }
1476 |                 ## }
1477 |                 
1478 |                 ## cov.term <- df.white*( (ginv.XX.tilde %*% Meat1 %*% ginv.XX.hat) + (ginv.XX.hat %*% Meat2 %*% ginv.XX.tilde))
1479 |                 ## print(cov.term)
1480 |                 
1481 |                 meat1 <- as.double(comp_OmegaHAC(c(X.tilde), e.tilde, c(X.hat), u.hat,
1482 |                                                  dim(X.tilde)[1], dim(X.hat)[2], data$u.index, J.u))
1483 |                 Meat1 <- matrix(meat1, nrow=ncol(X.tilde), ncol=ncol(X.tilde), byrow=T)
1484 |                 
1485 |                 meat2 <- as.double(comp_OmegaHAC(c(X.hat), u.hat, c(X.tilde), e.tilde,
1486 |                                                  dim(X.hat)[1], dim(X.tilde)[2], data$u.index, J.u))
1487 |                 Meat2 <- matrix(meat2, nrow=ncol(X.tilde), ncol=ncol(X.tilde), byrow=T)
1488 | 
1489 |                 cov.term <- df.white*( (ginv.XX.tilde %*% Meat1 %*% ginv.XX.hat) + (ginv.XX.hat %*% Meat2 %*% ginv.XX.tilde))
1490 |                 ## print(cov.term)
1491 |                 
1492 |                 Phi.hat <- Psi.hat.wfe + Psi.hat.fe - cov.term
1493 | 
1494 |                 ## -----------------------------------------------------
1495 |                 ## White test: null hypothesis is ``no misspecification''
1496 |                 ## -----------------------------------------------------
1497 | 
1498 |                 white.stat <- as.double(Re(t(coef.ols - coef.wls) %*% ginv(Phi.hat) %*% (coef.ols - coef.wls)))
1499 |                 
1500 |                 test.null <- pchisq(as.numeric(white.stat), df=nK, lower.tail=F) < White.alpha
1501 |                 white.p <- pchisq(as.numeric(white.stat), df=nK, lower.tail=F)
1502 | 
1503 |                 ## e <- environment()
1504 |                 ## save(file = "temp.RData", list = ls(), env = e)
1505 |                 
1506 |                 flush.console()
1507 | 
1508 |                 if (verbose) {
1509 |                   cat("\nWhite calculation done\n")
1510 |                   flush.console()
1511 |                 }
1512 | 
1513 |                 
1514 |             } else {
1515 |                 white.stat <- "NULL"
1516 |                 test.null <- "NULL"
1517 |                 white.p <- "NULL"
1518 |             }
1519 |             
1520 | 
1521 |             ## Creating a weight verctor
1522 |             ## original index
1523 |             idx <- paste(orig.unit.idx, orig.time.idx, sep="_")
1524 |             a <- units
1525 |             b <- times
1526 |             Wv <- as.vector(W) # as vector
1527 |             a1 <- rep(as.character(a$unit), each=nrow(W))
1528 |             b1 <- rep(as.character(b$time), ncol(W))
1529 |             idxall <- paste(a1, b1, sep="_")
1530 |             idxall.sub <- idxall[which(idxall %in% idx)]
1531 |             W.it <- Wv[which(idxall %in% idx)]
1532 |             u.sub <- unlist(lapply(idxall.sub,
1533 |                                    function(x) strsplit(x, "_")[[1]][1]))
1534 |             t.sub <- unlist(lapply(idxall.sub,
1535 |                                    function(x) strsplit(x, "_")[[1]][2]))
1536 | 
1537 |             cmd <- paste("W1 <- data.frame(", unit.index, "= u.sub)", sep="")
1538 |             eval(parse(text=cmd))
1539 |             if(is.null(time.index)){
1540 |                 W1$obs.idx <- t.sub
1541 |             } else {
1542 |                 cmd2 <- paste("W1$", time.index, " <- t.sub", sep="")
1543 |                 eval(parse(text=cmd2))
1544 |             }
1545 |             W1$W.it <- W.it
1546 | 
1547 |             ## ensuring the order reflects the original idx
1548 |             mf$W.it <- W.it[match(idxall.sub, idx)]
1549 |             u.orig <- unlist(lapply(idx,
1550 |                                     function(x) strsplit(x, "_")[[1]][1]))
1551 |             t.orig <- unlist(lapply(idx,
1552 |                                     function(x) strsplit(x, "_")[[1]][2]))
1553 | 
1554 |             cmd <- paste("D <- data.frame(", unit.index, "= u.orig)", sep="")
1555 |             eval(parse(text=cmd))
1556 |             if(is.null(time.index)){
1557 |                 D$obs.idx <- t.orig
1558 |             } else {
1559 |                 cmd2 <- paste("D$", time.index, " <- t.orig", sep="")
1560 |                 eval(parse(text=cmd2))
1561 |             }
1562 | 
1563 |             mf <- cbind(D, mf)
1564 |             Num.nonzero <- length(which(!W1$weights==0))
1565 | 
1566 |             
1567 |             
1568 | ### Saving results
1569 | 
1570 |             z <- list(coefficients = coef.wls,
1571 |                       x = x[,-1,drop=FALSE],
1572 |                       y = y,
1573 |                       mf = mf,
1574 |                       call = wfe.call,
1575 |                       vcov = as.numeric(vcov.wfe),
1576 |                       se = se.did,
1577 |                       sigma = try(sqrt(sigma2)),
1578 |                       df = d.f,
1579 |                       residuals = y - (x[,-1,drop=FALSE] %*% coef.wls),
1580 |                       W = W1,
1581 |                       Num.nonzero = Num.nonzero,
1582 |                       units = units,
1583 |                       times = times,
1584 |                       method = method,
1585 |                       causal = causal,
1586 |                       est = est,
1587 |                       std.error = std.error,
1588 |                       White.pvalue = white.p,
1589 |                       White.alpha = White.alpha,
1590 |                       White.stat = white.stat,
1591 |                       White.test = test.null,
1592 |                       Y.wdm = Y.wdm,
1593 |                       X.wdm = X.wdm)
1594 | 
1595 |             class(z) <- "wfedid"
1596 |             z
1597 |         }
1598 | 
1599 |     }
1600 | }
1601 | 
1602 | 
1603 | 
1604 | 
1605 | ### print wfe class
1606 | 
1607 | print.wfe <- function(x,...){
1608 |     cat("Call:\n")
1609 |     print(x$call)
1610 |     cat("\nCoefficients:\n")
1611 |     print(x$coefficients)
1612 |     cat("\nStd.Err:\n")
1613 |     print(x$se)
1614 | }
1615 | 
1616 | summary.wfe <- function(object, signif.stars = getOption("show.signif.stars"),...){
1617 |     se <- object$se
1618 |     sigma <- object$sigma
1619 |     df <- object$df
1620 |     tval <- try(coef(object) / se)
1621 |     TAB <- cbind(Estimate = coef(object),
1622 |                  Std.Error = se,
1623 |                  t.value = tval,
1624 |                  p.value = 2*pt(-abs(tval), df = object$df))
1625 |     res <- list(call = object$call,
1626 |                 coefficients = TAB,
1627 |                 sigma = object$sigma,
1628 |                 df = object$df,
1629 |                 W = object$W,
1630 |                 Num.nonzero = object$Num.nonzero,
1631 |                 units = object$units,
1632 |                 times = object$times,
1633 |                 residuals = object$residuals,
1634 |                 method = object$method,
1635 |                 causal = object$causal,
1636 |                 estimator = object$est,
1637 |                 std.error = object$std.error,
1638 |                 White.pvalue = object$White.pvalue,
1639 |                 White.alpha = object$White.alpha,
1640 |                 White.stat = object$White.stat,
1641 |                 White.test = object$White.test,
1642 |                 Y = object$y,
1643 |                 X = object$x,
1644 |                 Y.wdm = object$Y.wdm,
1645 |                 X.wdm = object$X.wdm              
1646 |                 )
1647 |     class(res) <- "summary.wfe"
1648 |     res
1649 | }
1650 | 
1651 | print.summary.wfe <- function(x, ...){
1652 |     cat("\nMethod:", x$method, "Fixed Effects\n")
1653 |     cat("\nQuantity of Interest:", x$causal)
1654 |     cat("\nEstimator:", x$estimator)
1655 |     cat("\nStandard Error:", x$std.error)
1656 |     cat("\n")
1657 |     cat("\n")
1658 |     cat("Call:\n")
1659 |     print(x$call)
1660 |     cat("\n")
1661 |     cat("Coefficients:\n")
1662 |     printCoefmat(x$coefficients, P.values=TRUE, has.Pvalue=TRUE)
1663 |     cat("\nResidual standard error:", format(signif(x$sigma,
1664 |                                                     4)), "on", x$df, "degrees of freedom")
1665 |     cat("\nWhite statistics for functional misspecification:", x$White.stat, "with Pvalue=", x$White.pvalue)
1666 |     cat("\nReject the null of NO misspecification:", x$White.test)
1667 |     cat("\n")
1668 | }
1669 | 
1670 | 
1671 | 
1672 | ### print wfedid class
1673 | 
1674 | print.wfedid <- function(x,...){
1675 |     cat("Call:\n")
1676 |     print(x$call)
1677 |     cat("\nCoefficients:\n")
1678 |     print(x$coefficients)
1679 |     cat("\nStd.Err:\n")
1680 |     print(x$se)
1681 | }
1682 | 
1683 | summary.wfedid <- function(object, signif.stars = getOption("show.signif.stars"),...){
1684 |     coef <- object$coefficients
1685 |     se <- object$se
1686 |     sigma <- object$sigma
1687 |     df <- object$df
1688 |     tval <- coef(object) / se
1689 |     TAB <- cbind(Estimate = coef,
1690 |                  Std.Error = se,
1691 |                  t.value = tval,
1692 |                  p.value = 2*pt(-abs(tval), df = object$df))
1693 |     res <- list(call = object$call,
1694 |                 coefficients = TAB,
1695 |                 sigma = object$sigma,
1696 |                 df = object$df,
1697 |                 W = object$W,
1698 |                 Num.nonzero = object$Num.nonzero,
1699 |                 residuals = object$residuals,
1700 |                 method = object$method,
1701 |                 causal = object$causal,
1702 |                 estimator = object$est,
1703 |                 std.error = object$std.error,
1704 |                 White.pvalue = object$White.pvalue,
1705 |                 White.alpha = object$White.alpha,
1706 |                 White.stat = object$White.stat,
1707 |                 White.test = object$White.test,
1708 |                 Y = object$y,
1709 |                 X = object$x,
1710 |                 Y.wdm = object$Y.wdm,
1711 |                 X.wdm = object$X.wdm
1712 |                 )
1713 |     class(res) <- "summary.wfedid"
1714 |     res
1715 | }
1716 | 
1717 | print.summary.wfedid <- function(x, ...){
1718 |     cat("\nMethod:", x$method, "Fixed Effects\n")
1719 |     cat("\nQuantity of Interest:", x$causal)
1720 |     cat("\nEstimator:", x$estimator)
1721 |     cat("\nStandard Error:", x$std.error)
1722 |     cat("\n")
1723 |     cat("\n")
1724 |     cat("Call:\n")
1725 |     cat("Coefficients:\n")
1726 |     print(x$call)
1727 |     cat("\n")
1728 |     colnames(x$coefficients) <- c("Estimate", "Std.Err", "t value", "Pr(>|t|)")
1729 |     printCoefmat(x$coefficients, P.values=TRUE, has.Pvalue=TRUE)
1730 |     cat("\nResidual standard error:", format(signif(x$sigma,
1731 |                                                     4)), "on", x$df, "degrees of freedom")
1732 |     if (!is.null(x$White.stat)){
1733 |         cat("\nWhite statistics for functional misspecification:", x$White.stat, "with Pvalue=", x$White.pvalue)
1734 |         cat("\nReject the null of NO misspecification:", x$White.test)
1735 |     }
1736 |     cat("\n")
1737 | }
1738 | 
1739 | 
1740 | 
1741 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # wfe: Weighted Linear Fixed Effects Regression Models for Causal Inference [![Build Status](https://travis-ci.org/insongkim/wfe.svg?branch=master)](https://travis-ci.org/insongkim/wfe) [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/wfe)](https://cran.r-project.org/package=wfe) ![CRAN downloads](http://cranlogs.r-pkg.org/badges/grand-total/wfe)
 2 | 
 3 | This R package provides a computationally efficient way of fitting
 4 | weighted linear fixed effects estimators for causal inference with
 5 | various weighting schemes. Imai and Kim (2016) show that weighted
 6 | linear fixed effects estimators can be used to estimate the average
 7 | treatment effects under different identification strategies. This
 8 | includes stratified randomized experiments, matching and
 9 | stratification for observational studies, first differencing, and
10 | difference-in-differences. The package also provides various robust
11 | standard errors and a specification test for standard linear fixed
12 | effects estimators.
13 | 


--------------------------------------------------------------------------------
/man/pwfe.Rd:
--------------------------------------------------------------------------------
  1 | \name{pwfe}
  2 | \alias{pwfe}
  3 | \title{Fitting the Weighted Fixed Effects Model with Propensity Score Weighting}
  4 | \description{
  5 |   \code{pwfe} is used to fit weighted fixed effects model for causal
  6 |   inference after transforming outcome variable based on estimated
  7 |   propensity score. \code{pwfe} also derives the regression weights for
  8 |   different causal quantity of interest.
  9 | }
 10 | \usage{
 11 | pwfe(formula, treat = "treat.name", outcome, data, pscore = NULL,
 12 |      unit.index, time.index = NULL, method = "unit", within.unit = TRUE,
 13 |      qoi = c("ate", "att"), estimator = NULL, C.it = NULL,
 14 |      White = TRUE, White.alpha = 0.05,
 15 |      hetero.se = TRUE, auto.se = TRUE, unbiased.se = FALSE,
 16 |      verbose = TRUE)
 17 | }
 18 | 
 19 | \arguments{
 20 | 
 21 |   \item{formula}{a symbolic description of the model for estimating
 22 |     propensity score. The formula should not include dummmies for
 23 |     fixed effects. The details of model specifications are given
 24 |     under `Details'.
 25 |   }
 26 |   
 27 |   \item{treat}{a character string indicating the name of treatment
 28 |     variable used in the models. The treatment should be binary
 29 |     indicator (integer with 0 for the control group and 1 for the
 30 |     treatment group).
 31 |   }
 32 |   
 33 |   \item{outcome}{a character string indicating the name of outcome
 34 |     variable.
 35 |   }
 36 | 
 37 |   \item{data}{data frame containing the variables in the model.
 38 |   }
 39 | 
 40 |   \item{pscore}{an optional character string indicating the name of
 41 |   estimated propensity score. Note that pre-specified propensity score
 42 |   should be bounded away from zero and one.
 43 |   }
 44 | 
 45 |   \item{unit.index}{a character string indicating the name of unit
 46 |     variable used in the models. The index of unit should be factor.
 47 |   }
 48 |   \item{time.index}{a character string indicating the name of time
 49 |     variable used in the models. The index of time should be factor.
 50 |   }
 51 |   \item{method}{method for weighted fixed effects regression, either
 52 |     \code{unit} for unit fixed effects; \code{time} for time fixed
 53 |     effects. The default is \code{unit}.
 54 |   }
 55 | 
 56 |   \item{within.unit}{a logical value indicating whether propensity score
 57 |     is estimated within unit. The default is \code{TRUE}. 
 58 |   }
 59 | 
 60 |   \item{qoi}{one of \code{"ate"} or \code{"att"}. The default is
 61 |     \code{"ate"}. \code{"fd"} and \code{"did"} are not compatible with
 62 |     \code{pwfe}.}
 63 | 
 64 |   \item{estimator}{an optional character string \code{"fd"} indicating
 65 |     whether the first-difference estimator will be used.}
 66 | 
 67 |   \item{C.it}{an optional non-negative numeric vector specifying
 68 |     relative weights for each unit of analysis.
 69 |   }
 70 | 
 71 |   \item{White}{a logical value indicating whether White misspecification
 72 |     statistics should be calculated. The default is \code{TRUE}.
 73 |   }
 74 |  
 75 |   \item{White.alpha}{level of functional specification test. See White
 76 |     (1980) and Imai . The default is \code{0.05}.
 77 |   }
 78 |   
 79 |   \item{hetero.se}{a logical value indicating whether heteroskedasticity
 80 |     across units is allowed in calculating standard errors. The default
 81 |     is \code{TRUE}.
 82 |   }
 83 | 
 84 |   \item{auto.se}{a logical value indicating whether arbitrary
 85 |     autocorrelation is allowed in calculating standard errors. The
 86 |     default is \code{TRUE}.
 87 |   }
 88 | 
 89 |   \item{unbiased.se}{logical. If \code{TRUE}, bias-asjusted
 90 |     heteroskedasticity-robust standard errors are used. See Stock and
 91 |     Watson (2008). Should be used only for balanced panel. The default
 92 |     is \code{FALSE}.
 93 |   }
 94 | 
 95 |   \item{verbose}{logical. If \code{TRUE}, helpful messages along with
 96 |     a progress report of the weight calculation are printed
 97 |     on the screen. The default is \code{TRUE}.
 98 |   }
 99 |   
100 | }
101 | \value{
102 |  \code{pwfe} returns an object of class "pwfe", a list that contains the
103 |  components listed below.
104 | 
105 | 
106 |  The function \code{summary} (i.e., \code{summary.pwfe}) can be used to
107 |  obtain a table of the results.
108 | 
109 |  \item{coefficients}{a named vector of coefficients}
110 |  \item{residuals}{the residuals, that is respons minus fitted values}
111 |  \item{df}{the degree of freedom}
112 |  \item{W}{weight matrix calculated from the model. Row and column
113 |    indices can be found from unit.name, time.name.}
114 |  \item{call}{the matched call}
115 |  \item{causal}{causal quantity of interest}
116 |  \item{estimator}{the estimating method}
117 |  \item{unit.name}{a vector containing unique unit names}
118 |  \item{unit.index}{a vector containing unique unit index number}
119 |  \item{time.name}{a vector containing unique time names}
120 |  \item{time.index}{a vector containing unique time index number}
121 |  \item{method}{call of the method used}
122 |  \item{vcov}{the variance covariance matrix}
123 |  \item{White.alpha}{the alpha level for White specification test}
124 |  \item{White.pvalue}{the p-value for White specification test}
125 |  \item{White.stat}{the White statistics}
126 |  \item{x}{the design matrix}
127 |  \item{y}{the response vector}
128 |  \item{mf}{the model frame}
129 |   
130 | }
131 | \details{
132 |   
133 |   To fit the weighted unit (time) fixed effects model with propensity
134 |   score weighting, use the syntax for the formula, \code{~ x1 + x2},
135 |   where \code{x1} and \code{x2} are unit (time) varying
136 |   covariates.
137 | 
138 |   One can provide his/her own estimated \code{pscore} which can be used
139 |   to transform the outcome varialbe. If so, one does not need to specify
140 |   \code{formula}.
141 | 
142 |   If \code{pscore} is not provided, \code{bayesglm} will be used to
143 |   estimate propensity scores. If \code{within.unit = TRUE}, propensity
144 |   score will be separately estimated within time (unit) when
145 |   \code{method} is \code{unit} (\code{time}). Otherwise, propensity
146 |   score will be estimated on entire data at once.
147 | 
148 |   The estimated propensity scores will be used to transform the
149 |   \code{outcome} variable as described in Imai and Kim (2018).
150 | 
151 |   \code{pwfe} calculates weights based on different underlying causal
152 |   quantity of interest: Average Treatment Effect (\code{qoi = "ate"}) or
153 |   Average Treatment Effect for the Treated (\code{qoi = "att"}).
154 | 
155 |   One can further set estimating methods: First-Difference
156 |   (\code{estimator ="fd"}) or Difference-in-differences (\code{estimator
157 |   = "did"}).
158 | 
159 |   To specify different ex-ante weights for each unit of analysis, use
160 |   non-negative weights \code{C.it}. For instance, using the survey
161 |   weights for \code{C.it} enables the estimation fo the average
162 |   treatement effect for the target population.
163 |   
164 | 
165 | }
166 | 
167 | \references{ Imai, Kosuke and In Song Kim. (2018) ``When Should We Use
168 | Unit Fixed Effects Regression Models for Causal Inference with
169 | Longitudinal Data?" American Journal of Political Science,
170 | Forthcoming.
171 | 
172 |   Stock, James and Mark Watson. (2008) ``Heteroskedasticity-Robust
173 |   Standard Errors for Fixed Effect Panel Data Regression'' Econometrica,
174 |   76, 1.
175 |   
176 |   White, Halbert. (1980) `Using Least Squares to Approximate Unknown
177 |   Regression Functions.''  International Economic Review, 21, 1,
178 |   149--170.
179 | 
180 | }
181 | \seealso{
182 |   \code{wfe} for fitting weighted fixed effect models.
183 | }
184 | \examples{
185 | ### NOTE: this example illustrates the use of wfe function with randomly
186 | ### generated panel data with arbitrary number of units and time.
187 | 
188 | ## generate panel data with number of units = N, number of time = Time
189 | \dontrun{
190 | 
191 | N <- 10 # number of distinct units
192 | Time <- 15 # number of distinct time
193 | 
194 | ## generate treatment variable
195 | treat <- matrix(rbinom(N*Time, size = 1, 0.25), ncol = N)
196 | ## make sure at least one observation is treated for each unit
197 | while ((sum(apply(treat, 2, mean) == 0) > 0) | (sum(apply(treat, 2, mean) == 1) > 0) |
198 |        (sum(apply(treat, 1, mean) == 0) > 0) | (sum(apply(treat, 1, mean) == 1) > 0)) {
199 |   treat <- matrix(rbinom(N*Time, size = 1, 0.25), ncol = N)
200 | }
201 | treat.vec <- c(treat)
202 | 
203 | ## unit fixed effects
204 | alphai <- rnorm(N, mean = apply(treat, 2, mean))
205 | 
206 | ## geneate two random covariates
207 | x1 <- matrix(rnorm(N*Time, 0.5,1), ncol=N)
208 | x2 <- matrix(rbeta(N*Time, 5,1), ncol=N)
209 | pscore <- matrix(runif(N*Time, 0,1), ncol=N)
210 | x1.vec <- c(x1)
211 | x2.vec <- c(x2)
212 | pscore <- c(pscore)
213 | 
214 | ## generate outcome variable
215 | y <- matrix(NA, ncol = N, nrow = Time)
216 | for (i in 1:N) {
217 |     y[, i] <- alphai[i] + treat[, i] + x1[,i] + x2[,i] + rnorm(Time)
218 | }
219 | y.vec <- c(y)
220 | 
221 | ## generate unit and time index
222 | unit.index <- rep(1:N, each = Time)
223 | time.index <- rep(1:Time, N)
224 | 
225 | Data.str <- as.data.frame(cbind(y.vec, treat.vec, unit.index, x1.vec, x2.vec))
226 | colnames(Data.str) <- c("y", "tr", "strata.id", "x1", "x2")
227 | 
228 | Data.obs <- as.data.frame(cbind(y.vec, treat.vec, unit.index, time.index, x1.vec, x2.vec, pscore))
229 | colnames(Data.obs) <- c("y", "tr", "unit", "time", "x1", "x2", "pscore")
230 | 
231 | 
232 | ############################################################
233 | # Example 1: Stratified Randomized Experiments
234 | ############################################################
235 | 
236 | ## run the weighted fixed effect regression with strata fixed effect.
237 | ## Note: the quantity of interest is Average Treatment Effect ("ate")
238 | ## and the standard errors allow heteroskedasticity and arbitrary
239 | ## autocorrelation.
240 | 
241 | 
242 | ### Average Treatment Effect
243 | ps.ate <- pwfe(~ x1+x2, treat = "tr", outcome = "y", data = Data.str, 
244 |                unit.index = "strata.id", method = "unit", within.unit = TRUE,
245 |                qoi = "ate", hetero.se=TRUE, auto.se=TRUE)
246 | ## summarize the results
247 | summary(ps.ate)
248 | 
249 | ### Average Treatment Effect for the Treated
250 | ps.att <- pwfe(~ x1+x2, treat = "tr", outcome = "y", data = Data.str, 
251 |                unit.index = "strata.id", method = "unit", within.unit = TRUE,
252 |                qoi = "att", hetero.se=TRUE, auto.se=TRUE)
253 | ## summarize the results
254 | summary(ps.att)
255 | 
256 | 
257 | ############################################################
258 | # Example 2: Observational Studies with Unit Fixed-effects
259 | ############################################################
260 | 
261 | ## run the weighted fixed effect regression with unit fixed effect.
262 | ## Note: the quantity of interest is Average Treatment Effect ("ate")
263 | ## and the standard errors allow heteroskedasticity and arbitrary
264 | ## autocorrelation.
265 | 
266 | ### Average Treatment Effect
267 | ps.obs <- pwfe(~ x1+x2, treat = "tr", outcome = "y", data = Data.obs, 
268 |                unit.index = "unit", time.index = "time",
269 |                method = "unit", within.unit = TRUE,
270 |                qoi = "ate", hetero.se=TRUE, auto.se=TRUE)
271 | 
272 | ## summarize the results
273 | summary(ps.obs)
274 | 
275 | ## extracting weigths
276 | summary(ps.obs)$Weights
277 | 
278 | ### Average Treatment Effect with First-difference
279 | 
280 | ps.fd <- pwfe(~ x1+x2, treat = "tr", outcome = "y", data = Data.obs,
281 |               unit.index = "unit", time.index = "time",
282 |               method = "unit", within.unit = TRUE,
283 |               qoi = "ate", estimator = "fd", hetero.se=TRUE, auto.se=TRUE)
284 | 
285 | ## summarize the results
286 | summary(ps.fd)
287 | 
288 | 
289 | ############################################################
290 | # Example 3: Estimation with pre-specified propensity score
291 | ############################################################
292 | 
293 | ### Average Treatment Effect with Pre-specified Propensity Scores
294 | 
295 | mod.ps <- pwfe(treat = "tr", outcome = "y", data = Data.obs, pscore = "pscore",
296 |                unit.index = "unit", time.index = "time",
297 |                method = "unit", within.unit = TRUE,
298 |                qoi = "ate", hetero.se=TRUE, auto.se=TRUE)
299 | 
300 | ## summarize the results
301 | summary(mod.ps)
302 | }
303 | 
304 | }
305 | 
306 | \author{In Song Kim, Massachusetts Institute of Technology, \email{insong@mit.edu}		
307 | 	and Kosuke Imai, Princeton University, \email{imai@harvard.edu}	}
308 | 
309 | \keyword{regression}
310 | 
311 | 
312 | 
313 | 


--------------------------------------------------------------------------------
/man/wfe.Rd:
--------------------------------------------------------------------------------
  1 | \name{wfe}
  2 | 
  3 | \alias{wfe}
  4 | \title{Fitting the Weighted Fixed Effects Model for Causal Inference}
  5 | 
  6 | \description{
  7 |   \code{wfe} is used to fit weighted fixed effects model for causal
  8 |   inference. \code{wfe} also derives the regression weights for
  9 |   different causal quantity of interest.
 10 | }
 11 | 
 12 | \usage{
 13 | wfe(formula, data, treat = "treat.name",
 14 |     unit.index, time.index = NULL, method = "unit",
 15 |     dyad1.index = NULL, dyad2.index = NULL,
 16 |     qoi = "ate", estimator = NULL, C.it = NULL,
 17 |     hetero.se = TRUE, auto.se = TRUE, 
 18 |     dyad.se = FALSE,
 19 |     White = TRUE, White.alpha = 0.05,
 20 |     verbose = TRUE, unbiased.se = FALSE, unweighted = FALSE,
 21 |     store.wdm = FALSE, maxdev.did = NULL,
 22 |     tol = sqrt(.Machine$double.eps))
 23 | }
 24 | 
 25 | 
 26 | 
 27 | \arguments{
 28 | 
 29 |   \item{formula}{a symbolic description of the model to be fitted. The
 30 |     formula should not include dummmies for fixed effects. The details
 31 |     of model specifications are given under `Details'.
 32 |   }
 33 |   \item{data}{data frame containing the variables in the model.
 34 |   }
 35 |   \item{treat}{a character string indicating the name of treatment
 36 |     variable used in the models. The treatment should be binary
 37 |     indicator (integer with 0 for the control group and 1 for the
 38 |     treatment group).
 39 |   }
 40 |   \item{unit.index}{a character string indicating the name of unit
 41 |     variable used in the models. The index of unit should be factor.
 42 |   }
 43 |   \item{time.index}{a character string indicating the name of time
 44 |     variable used in the models. The index of time should be factor.
 45 |   }
 46 |   
 47 |   \item{method}{method for weighted fixed effects regression, either
 48 |     \code{unit} for unit fixed effects; \code{time} for time fixed
 49 |     effects. The default is \code{unit}. For two-way weighted fixed
 50 |     effects regression models, set method to the default value
 51 |     \code{unit}.}
 52 | 
 53 |   \item{dyad1.index}{a character string indicating the variable name of first unit
 54 |     of a given dyad. The default is \code{NULL}.
 55 |     This is required to calculate robust standard errors with dyadic data.
 56 |   }
 57 | 
 58 |   \item{dyad2.index}{a character string indicating the variable name of second unit
 59 |     of a given dyad. The default is \code{NULL}.
 60 |     This is required to calculate robust standard errors with dyadic data.
 61 |   }
 62 | 
 63 |   \item{qoi}{one of \code{"ate"} or \code{"att"}. The default is
 64 |     \code{"ate"}. If set to \code{"att"} in implementing \code{"fd"}
 65 |     and \code{"did"} estimators, the comparison of the treated
 66 |     observation is restricted to the control observation from the
 67 |     previous time period but not with the control observation from the
 68 |     next time period.}
 69 | 
 70 |   \item{estimator}{an optional character string indicating the
 71 |   estimating method. One of \code{"fd"}, \code{"did"}, or
 72 |   \code{"Mdid"}. \code{"fd"} is for First-Difference
 73 |   Design. \code{"did"} is for multi-period Difference-in-Differences
 74 |   design. The default is \code{NULL}. Setting estimator to be
 75 |   \code{"Mdid"} implements the Difference-in-Differences design with
 76 |   Matching on the pretreatment outcome variables.}
 77 |   
 78 |   \item{C.it}{an optional non-negative numeric vector specifying
 79 |     relative weights for each unit of analysis. If not specified, the
 80 |     weights will be calculated based on the estimator and quantity of
 81 |     interest. }
 82 |   
 83 |   \item{hetero.se}{a logical value indicating whether heteroskedasticity
 84 |     across units is allowed in calculating standard errors. The default
 85 |     is \code{TRUE}.
 86 |   }
 87 | 
 88 |   \item{auto.se}{a logical value indicating whether arbitrary
 89 |     autocorrelation is allowed in calculating standard errors. The
 90 |     default is \code{TRUE}.
 91 |   }
 92 | 
 93 |   \item{dyad.se}{a logical value indicating whether correlations across dyads exist. The
 94 |     default is \code{FALSE}. 
 95 |   }
 96 | 
 97 |   \item{White}{a logical value indicating whether White misspecification
 98 |     statistics should be calculated. The default is \code{TRUE}.
 99 |   }
100 |   
101 |   \item{White.alpha}{level of functional specification test. See White
102 |     (1980) and Imai and Kim (2018). The default is \code{0.05}.
103 |   }
104 | 
105 |   \item{verbose}{logical. If \code{TRUE}, helpful messages along with
106 |     a progress report of the weight calculation are printed
107 |     on the screen. The default is \code{TRUE}.
108 |   }
109 | 
110 |   \item{unbiased.se}{logical. If \code{TRUE}, bias-asjusted
111 |     heteroskedasticity-robust standard errors are used. See Stock and
112 |     Watson (2008). Should be used only for balanced panel. The default
113 |     is \code{FALSE}.
114 |   }
115 |   
116 |   \item{unweighted}{logical. If \code{TRUE}, standard unweighted fixed
117 |     effects model is estimated. The default is \code{FALSE}. Note:
118 |     users do not need to specify \code{qoi} when
119 |     unweighted=\code{TRUE}. For standard two-way fixed effects model
120 |     (unit and time), set estimator=\code{"did"} and
121 |     unweighted=\code{"TRUE"}.}
122 | 
123 |   \item{store.wdm}{logical. If \code{TRUE}, weighted demeaned
124 |     dataframe will be stored. The default is \code{FALSE}.}
125 | 
126 |   \item{maxdev.did}{an optional positive numeric value specifying the
127 |   maximum deviation in pre-treatment outcome when \code{"Mdid"} is
128 |   implemented. The default is \code{NULL}, which implements
129 |   nearest-neighbor matching.}
130 | 
131 |   \item{tol}{a relative tolerance to detect zero singular values for
132 |     generalized inverse. The default is sqrt(.Machine$double.eps)
133 |   }
134 |   
135 | 
136 |   
137 | }
138 | 
139 | 
140 | \value{
141 | 
142 |  \code{wfe} returns an object of class "wfe", a list that contains the
143 |  components listed below.
144 | 
145 | 
146 |  The function \code{summary} (i.e., \code{summary.wfe}) can be used to
147 |  obtain a table of the results.
148 | 
149 |  \item{coefficients}{a named vector of coefficients}
150 |  \item{residuals}{the residuals, that is respons minus fitted values}
151 |  \item{df}{the degree of freedom}
152 |  \item{W}{a dataframe containing unit and time indices along with the
153 |  weights used for the observation. If method=\code{unit}, integer numbers
154 |  corresponding to the order of input data will be used for generating time index.}
155 |  \item{Num.nonzero}{Number of observations with non-zero weights}
156 |  \item{call}{the matched call}
157 |  \item{causal}{causal quantity of interest}
158 |  \item{estimator}{the estimating method}
159 |  \item{units}{a dataframe containing unit names used for \code{W}}
160 |  \item{times}{a dataframe containing time names used for \code{W}}
161 |  \item{method}{call of the method used}
162 |  \item{vcov}{the variance covariance matrix}
163 |  \item{White.alpha}{the alpha level for White specification test}
164 |  \item{White.pvalue}{the p-value for White specification test}
165 |  \item{White.stat}{the White statistics}
166 |  \item{X}{the design matrix}
167 |  \item{Y}{the response vector}
168 |  \item{X.wdm}{the demeaned design matrix}
169 |  \item{Y.wdm}{the demeaned response vector}
170 |  \item{mf}{the model frame where the last column is the weights used for the analysis}
171 | }
172 | 
173 | \details{
174 | 
175 |   To fit the weighted unit (time) fixed effects model, use the syntax
176 |   for the formula, \code{y ~ x1 + x2}, where \code{y} is a dependent
177 |   variable and \code{x1} and \code{x2} are unit (time) varying
178 |   covariates.
179 | 
180 |   \code{wfe} calculates weights based on different underlying causal
181 |   quantity of interest: Average Treatment Effect (\code{qoi = "ate"}) or
182 |   Average Treatment Effect for the Treated (\code{qoi = "att"}).
183 | 
184 |   One can further set estimating methods: First-Difference
185 |   (\code{estimator ="fd"}) or Difference-in-differences (\code{estimator
186 |     = "did"}). For the two-way fixed effects model, set \code{estimator
187 |     = "did"}
188 |   
189 |   To specify different ex-ante weights for each unit of analysis, use
190 |   non-negative weights \code{C.it}. For instance, using the survey
191 |   weights for \code{C.it} enables the estimation fo the average
192 |   treatement effect for the target population.
193 | 
194 |   An object of class "wfe" contains vectors of unique unit(time) names and
195 |   unique unit(time) indices. 
196 | 
197 | }
198 | 
199 | \references{Imai, Kosuke and In Song Kim. (2018) ``When Should We Use
200 | Unit Fixed Effects Regression Models for Causal Inference with
201 | Longitudinal Data?" American Journal of Political Science,
202 | Forthcoming.
203 | 
204 |   Aronow, Peter M., Cyrus Samii, and Valentina A. Assenova (2015) ``Cluster–robust
205 |   Variance Estimation for Dyadic Data." Political Analysis 23, no. 4, 564--577.
206 | 	
207 |   Stock, James and Mark Watson. (2008) ``Heteroskedasticity-Robust
208 |   Standard Errors for Fixed Effect Panel Data Regression'' Econometrica,
209 |   76, 1.
210 |   
211 |   White, Halbert. (1980) ``Using Least Squares to Approximate Unknown
212 |   Regression Functions.''  International Economic Review, 21, 1,
213 |   149--170.
214 | 
215 | }
216 | \seealso{
217 |   \code{pwfe} for fitting weighted fixed effects models with propensity
218 |   score weighting
219 | }
220 | \examples{
221 | ### NOTE: this example illustrates the use of wfe function with randomly
222 | ### generated panel data with arbitrary number of units and time.
223 | 
224 | ## generate panel data with number of units = N, number of time = Time
225 | N <- 10 # number of distinct units
226 | Time <- 15 # number of distinct time
227 | 
228 | ## treatment effect
229 | beta <- 1
230 | 
231 | ## generate treatment variable
232 | treat <- matrix(rbinom(N*Time, size = 1, 0.25), ncol = N)
233 | ## make sure at least one observation is treated for each unit
234 | while ((sum(apply(treat, 2, mean) == 0) > 0) | (sum(apply(treat, 2, mean) == 1) > 0) |
235 |        (sum(apply(treat, 1, mean) == 0) > 0) | (sum(apply(treat, 1, mean) == 1) > 0)) {
236 |   treat <- matrix(rbinom(N*Time, size = 1, 0.25), ncol = N)
237 | }
238 | treat.vec <- c(treat)
239 | 
240 | ## unit fixed effects
241 | alphai <- rnorm(N, mean = apply(treat, 2, mean))
242 | 
243 | ## geneate two random covariates
244 | x1 <- matrix(rnorm(N*Time, 0.5,1), ncol=N)
245 | x2 <- matrix(rbeta(N*Time, 5,1), ncol=N)
246 | x1.vec <- c(x1)
247 | x2.vec <- c(x2)
248 | ## generate outcome variable
249 | y <- matrix(NA, ncol = N, nrow = Time)
250 | for (i in 1:N) {
251 |     y[, i] <- alphai[i] + treat[, i] + x1[,i] + x2[,i] + rnorm(Time)
252 | }
253 | y.vec <- c(y)
254 | 
255 | ## generate unit and time index
256 | unit.index <- rep(1:N, each = Time)
257 | time.index <- rep(1:Time, N)
258 | 
259 | Data.str <- as.data.frame(cbind(y.vec, treat.vec, unit.index, x1.vec, x2.vec))
260 | colnames(Data.str) <- c("y", "tr", "strata.id", "x1", "x2")
261 | 
262 | Data.obs <- as.data.frame(cbind(y.vec, treat.vec, unit.index, time.index, x1.vec, x2.vec))
263 | colnames(Data.obs) <- c("y", "tr", "unit", "time", "x1", "x2")
264 | 
265 | ############################################################
266 | # Example 1: Stratified Randomized Experiments
267 | ############################################################
268 | 
269 | ## run the weighted fixed effect regression with strata fixed effect.
270 | ## Note: the quantity of interest is Average Treatment Effect ("ate")
271 | ## and the standard errors allow heteroskedasticity and arbitrary
272 | ## autocorrelation.
273 | 
274 | 
275 | ### Average Treatment Effect
276 | mod.ate <- wfe(y~ tr+x1+x2, data = Data.str, treat = "tr",
277 |                unit.index = "strata.id", method = "unit",
278 |                qoi = "ate", hetero.se=TRUE, auto.se=TRUE)
279 | ## summarize the results
280 | summary(mod.ate)
281 | 
282 | ### Average Treatment Effect for the Treated
283 | mod.att <- wfe(y~ tr+x1+x2, data = Data.str, treat = "tr",
284 |                unit.index = "strata.id", method = "unit",
285 |                qoi = "att", hetero.se=TRUE, auto.se=TRUE)
286 | ## summarize the results
287 | summary(mod.att)
288 | 
289 | 
290 | ############################################################
291 | # Example 2: Observational Studies with Unit Fixed-effects
292 | ############################################################
293 | 
294 | ## run the weighted fixed effect regression with unit fixed effect.
295 | ## Note: the quantity of interest is Average Treatment Effect ("ate")
296 | ## and the standard errors allow heteroskedasticity and arbitrary
297 | ## autocorrelation.
298 | 
299 | mod.obs <- wfe(y~ tr+x1+x2, data = Data.obs, treat = "tr",
300 |                unit.index = "unit", time.index = "time", method = "unit",
301 |                qoi = "ate", hetero.se=TRUE, auto.se=TRUE,
302 |                White = TRUE, White.alpha = 0.05)
303 | 
304 | ## summarize the results
305 | summary(mod.obs)
306 | 
307 | ## extracting weigths
308 | summary(mod.obs)$W
309 | 
310 | 
311 | \dontrun{
312 | ###################################################################
313 | # Example 3: Observational Studies with differences-in-differences
314 | ###################################################################
315 | 
316 | ## run difference-in-differences estimator.
317 | ## Note: the quantity of interest is Average Treatment Effect ("ate")
318 | ## and the standard errors allow heteroskedasticity and arbitrary
319 | ## autocorrelation.
320 | 
321 | mod.did <- wfe(y~ tr+x1+x2, data = Data.obs, treat = "tr",
322 |                unit.index = "unit", time.index = "time", method = "unit",
323 |                qoi = "ate", estimator ="did", hetero.se=TRUE, auto.se=TRUE,
324 |                White = TRUE, White.alpha = 0.05, verbose = TRUE)
325 | 
326 | ## summarize the results
327 | summary(mod.did)
328 | 
329 | ## extracting weigths
330 | summary(mod.did)$W
331 | 
332 | #########################################################################
333 | # Example 4: DID with Matching on Pre-treatment Outcomes
334 | #########################################################################
335 | 
336 | ## implements matching on pre-treatment outcomes where the maximum
337 | ## deviation is specified as 0.5 
338 | 
339 | mod.Mdid <- wfe(y~ tr+x1+x2, data = Data.obs, treat = "tr",
340 |                unit.index = "unit", time.index = "time", method = "unit",
341 |                qoi = "ate", estimator ="Mdid", hetero.se=TRUE, auto.se=TRUE,
342 |                White = TRUE, White.alpha = 0.05, maxdev.did = 0.5, verbose = TRUE)
343 | 
344 | ## summarize the results
345 | summary(mod.Mdid)
346 | 
347 | ## Note: setting the maximum deviation to infinity (or any value
348 | ## bigger than the maximum pair-wise difference in the outcome) will
349 | ## return the same result as Example 3.
350 | 
351 | dev <- 1000+max(Data.obs$y)-min(Data.obs$y)
352 | mod.did2 <- wfe(y~ tr+x1+x2, data = Data.obs, treat = "tr",
353 |                unit.index = "unit", time.index = "time", method = "unit",
354 |                qoi = "ate", estimator ="Mdid", hetero.se=TRUE, auto.se=TRUE,
355 |                White = TRUE, White.alpha = 0.05, maxdev.did = dev, verbose = TRUE)
356 | 
357 | ## summarize the results
358 | summary(mod.did2)
359 | mod.did2$coef[1] ==  mod.did$coef[1]
360 | }
361 | 
362 | }
363 | 
364 | 
365 | \author{In Song Kim, Massachusetts Institute of Technology, \email{insong@mit.edu}		
366 | 	and Kosuke Imai, Princeton University, \email{imai@harvard.edu}	}
367 | 
368 | \keyword{regression}
369 | 


--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
1 | PKG_LIBS =  $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
2 | 
3 | 
4 | 


--------------------------------------------------------------------------------
/src/init.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h> // for NULL
 2 | #include <R_ext/Rdynload.h>
 3 | 
 4 | /* FIXME: 
 5 |    Check these declarations against the C/Fortran source code.
 6 | */
 7 | 
 8 | /* .C calls */
 9 | extern void CalDID(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
10 | extern void comp_OmegaHAC(void *, void *, void *, void *, void *, void *, void *, void *, void *);
11 | extern void comp_OmegaHC(void *, void *, void *, void *, void *, void *, void *, void *, void *);
12 | extern void Demean(void *, void *, void *, void *, void *);
13 | extern void DemeanDID(void *, void *, void *, void *, void *, void *, void *, void *);
14 | extern void GenTime(void *, void *, void *, void *);
15 | extern void GenWeightsDID(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
16 | extern void GenWeightsFD(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
17 | extern void GenWeightsMDID(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
18 | extern void GenWeightsTime(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
19 | extern void GenWeightsUnit(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
20 | extern void Index(void *, void *, void *, void *, void *);
21 | extern void LamdaDID1(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
22 | extern void LamdaDID2(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
23 | extern void MDummy(void *, void *, void *, void *);
24 | extern void OmegaDiDHAC(void *, void *, void *, void *, void *, void *, void *, void *);
25 | extern void OmegaDiDHAC2(void *, void *, void *, void *, void *, void *, void *, void *);
26 | extern void OmegaHatHAC(void *, void *, void *, void *, void *, void *, void *);
27 | extern void OmegaHatHC(void *, void *, void *, void *, void *, void *, void *);
28 | extern void ProjectionM(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
29 | extern void Transform(void *, void *, void *, void *, void *);
30 | extern void TwayDemean(void *, void *, void *, void *, void *, void *, void *);
31 | extern void VectorizeC(void *, void *, void *, void *, void *, void *, void *);
32 | extern void WDemean(void *, void *, void *, void *, void *, void *);
33 | extern void WWDemean(void *, void *, void *, void *, void *, void *);
34 | extern void XWXiSum(void *, void *, void *, void *, void *, void *, void *);
35 | extern void XXiSum(void *, void *, void *, void *, void *, void *);
36 | 
37 | static const R_CMethodDef CEntries[] = {
38 |     {"CalDID",         (DL_FUNC) &CalDID,         12},
39 |     {"comp_OmegaHAC",  (DL_FUNC) &comp_OmegaHAC,   9},
40 |     {"comp_OmegaHC",   (DL_FUNC) &comp_OmegaHC,    9},
41 |     {"Demean",         (DL_FUNC) &Demean,          5},
42 |     {"DemeanDID",      (DL_FUNC) &DemeanDID,       8},
43 |     {"GenTime",        (DL_FUNC) &GenTime,         4},
44 |     {"GenWeightsDID",  (DL_FUNC) &GenWeightsDID,  11},
45 |     {"GenWeightsFD",   (DL_FUNC) &GenWeightsFD,   11},
46 |     {"GenWeightsMDID", (DL_FUNC) &GenWeightsMDID, 13},
47 |     {"GenWeightsTime", (DL_FUNC) &GenWeightsTime, 11},
48 |     {"GenWeightsUnit", (DL_FUNC) &GenWeightsUnit, 11},
49 |     {"Index",          (DL_FUNC) &Index,           5},
50 |     {"LamdaDID1",      (DL_FUNC) &LamdaDID1,      14},
51 |     {"LamdaDID2",      (DL_FUNC) &LamdaDID2,      14},
52 |     {"MDummy",         (DL_FUNC) &MDummy,          4},
53 |     {"OmegaDiDHAC",    (DL_FUNC) &OmegaDiDHAC,     8},
54 |     {"OmegaDiDHAC2",   (DL_FUNC) &OmegaDiDHAC2,    8},
55 |     {"OmegaHatHAC",    (DL_FUNC) &OmegaHatHAC,     7},
56 |     {"OmegaHatHC",     (DL_FUNC) &OmegaHatHC,      7},
57 |     {"ProjectionM",    (DL_FUNC) &ProjectionM,    17},
58 |     {"Transform",      (DL_FUNC) &Transform,       5},
59 |     {"TwayDemean",     (DL_FUNC) &TwayDemean,      7},
60 |     {"VectorizeC",     (DL_FUNC) &VectorizeC,      7},
61 |     {"WDemean",        (DL_FUNC) &WDemean,         6},
62 |     {"WWDemean",       (DL_FUNC) &WWDemean,        6},
63 |     {"XWXiSum",        (DL_FUNC) &XWXiSum,         7},
64 |     {"XXiSum",         (DL_FUNC) &XXiSum,          6},
65 |     {NULL, NULL, 0}
66 | };
67 | 
68 | void R_init_wfe(DllInfo *dll)
69 | {
70 |     R_registerRoutines(dll, CEntries, NULL, NULL, NULL);
71 |     R_useDynamicSymbols(dll, FALSE);
72 | }
73 | 


--------------------------------------------------------------------------------
/src/vector.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <assert.h>
  3 | #include <stdio.h>
  4 | #include <R_ext/Utils.h>
  5 | #include <R.h>
  6 | #include "vector.h"
  7 | #include <complex.h>
  8 | #include <R_ext/Complex.h>
  9 | 
 10 | Rcomplex* compArray(int num) {
 11 |   
 12 |   Rcomplex *cArray = (Rcomplex *)malloc(num * sizeof(Rcomplex));
 13 |   if (!cArray)
 14 |     error("Out of memory error in compArray\n");
 15 |   return cArray;
 16 | }
 17 | 
 18 | 
 19 | Rcomplex** compMatrix(int row, int col) {
 20 | 
 21 |   int i;
 22 |   Rcomplex **cMatrix = (Rcomplex **)malloc(row * sizeof(Rcomplex *));
 23 |   if (!cMatrix)
 24 |     error("Out of memory error in compMatrix\n");
 25 |   for (i = 0; i < row; i++) {
 26 |      cMatrix[i] = (Rcomplex *)calloc(col, sizeof(Rcomplex));
 27 |     if (!cMatrix[i])
 28 |       error("Out of memory error in compMatrix\n");
 29 |   }
 30 |   return cMatrix;
 31 | }
 32 | 
 33 | void PcompMatrix(Rcomplex **cmatrix, int row, int col) {
 34 |    int i, j;
 35 |    for (i = 0; i < row; i++) {
 36 |       for (j = 0; j < col; j++) {
 37 | 	 if (cmatrix[i][j].i >= 0) {
 38 | 	    Rprintf("%6.4f+%6.4fi ", cmatrix[i][j].r, cmatrix[i][j].i);
 39 | 	 } 
 40 | 	 if (cmatrix[i][j].i < 0) {
 41 | 	    Rprintf("%6.4f%6.4fi ", cmatrix[i][j].r, cmatrix[i][j].i);
 42 | 	 } 
 43 |       }
 44 |       Rprintf("\n");      
 45 |    }
 46 | }
 47 | 
 48 | int* intArray(int num) {
 49 |   int *iArray = (int *)malloc(num * sizeof(int));
 50 |   if (!iArray)
 51 |     error("Out of memory error in intArray\n");
 52 |   return iArray;
 53 | }
 54 | 
 55 | void PintArray(int *ivector, int length) {
 56 |   int i;
 57 |   for (i = 0; i < length; i++)
 58 |     Rprintf("%5d\n", ivector[i]);
 59 | }
 60 | 
 61 | int** intMatrix(int row, int col) {
 62 |   int i;
 63 |   int **iMatrix = (int **)malloc(row * sizeof(int *));
 64 |   if (!iMatrix) 
 65 |     error("Out of memory error in intMatrix\n");
 66 |   for (i = 0; i < row; i++) {
 67 |     iMatrix[i] = (int *)malloc(col *  sizeof(int));
 68 |     if (!iMatrix[i]) 
 69 |       error("Out of memory error in intMatrix\n");
 70 |   }
 71 |   return iMatrix;
 72 | }
 73 | 
 74 | void PintMatrix(int **imatrix, int row, int col) {
 75 |   int i, j;
 76 |   for (i = 0; i < row; i++) {
 77 |     for (j = 0; j < col; j++)
 78 |       Rprintf("%5d", imatrix[i][j]);
 79 |     Rprintf("\n");
 80 |   }
 81 | }
 82 | 
 83 | 
 84 | double* doubleArray(int num) {
 85 |   double *dArray = (double *)malloc(num * sizeof(double));
 86 |   if (!dArray)
 87 |     error("Out of memory error in doubleArray\n");
 88 |   return dArray;
 89 | }
 90 | 
 91 | void PdoubleArray(double *dvector, int length) {
 92 |   int i;
 93 |   for (i = 0; i < length; i++)
 94 |     Rprintf("%14g\n", dvector[i]);
 95 | }
 96 | 
 97 | double** doubleMatrix(int row, int col) {
 98 |   int i;
 99 |   double **dMatrix = (double **)malloc((size_t)(row * sizeof(double *)));
100 |   if (!dMatrix) 
101 |     error("Out of memory error in doubleMatrix\n");
102 |   for (i = 0; i < row; i++) {
103 |     dMatrix[i] = (double *)malloc((size_t)(col * sizeof(double)));
104 |     if (!dMatrix[i])
105 |       error("Out of memory error in doubleMatrix\n");
106 |   }
107 |   return dMatrix;
108 | }
109 | 
110 | void PdoubleMatrix(double **dmatrix, int row, int col) {
111 |   int i, j;
112 |   for (i = 0; i < row; i++) {
113 |     for (j = 0; j < col; j++)
114 |       Rprintf("%14g", dmatrix[i][j]);
115 |     Rprintf("\n");
116 |   }
117 | }
118 | 
119 | double*** doubleMatrix3D(int x, int y, int z) {
120 |   int i;
121 |   double ***dM3 = (double ***)malloc(x * sizeof(double **));
122 |   if (!dM3) 
123 |     error("Out of memory error in doubleMatrix3D\n");
124 |   for (i = 0; i < x; i++) 
125 |     dM3[i] = doubleMatrix(y, z);
126 |   return dM3;
127 | }
128 | 
129 | void PdoubleMatrix3D(double ***dmatrix3D, int x, int y, int z) {
130 |   int i, j, k;
131 |   for (i = 0; i < x; i++) {
132 |     Rprintf("First dimension = %5d\n", i);
133 |     for (j = 0; j < y; j++) {
134 |       for (k = 0; k < z; k++)
135 | 	Rprintf("%14g", dmatrix3D[i][j][k]);
136 |       Rprintf("\n");
137 |     }
138 |   }
139 | }
140 | 
141 | double**** doubleMatrix4D(int x, int y, int z, int zz) {
142 |   int i;
143 |   double ****dM4 = (double ****)malloc(x * sizeof(double ***));
144 |   if (!dM4) 
145 |     error("Out of memory error in doubleMatrix4D\n");
146 |   for (i = 0; i < x; i++) 
147 |     dM4[i] = doubleMatrix3D(y, z, zz);
148 |   return dM4;
149 | }
150 | 
151 | long* longArray(int num) {
152 |   long *lArray = (long *)malloc(num * sizeof(long));
153 |   if (!lArray)
154 |     error("Out of memory error in longArray\n");
155 |   return lArray;
156 | }
157 | 
158 | void FreeMatrix(double **Matrix, int row) {
159 |   int i;
160 |   for (i = 0; i < row; i++)
161 |     free(Matrix[i]);
162 |   free(Matrix);
163 | }
164 | 
165 | void FreeintMatrix(int **Matrix, int row) {
166 |   int i;
167 |   for (i = 0; i < row; i++)
168 |     free(Matrix[i]);
169 |   free(Matrix);
170 | }
171 | 
172 | void FreecompMatrix(Rcomplex **Matrix, int row) {
173 |   int i; 
174 |   for (i = 0; i < row; i++)
175 |     free(Matrix[i]);
176 |   free(Matrix);
177 | }
178 | 
179 | 
180 | void Free3DMatrix(double ***Matrix, int index, int row) {
181 |   int i;
182 |   for (i = 0; i < index; i++)
183 |     FreeMatrix(Matrix[i], row);
184 |   free(Matrix);
185 | }
186 | 
187 | void Free4DMatrix(double ****Matrix, int index, int index1, int row) {
188 |   int i;
189 |   for (i = 0; i < index; i++)
190 |     Free3DMatrix(Matrix[i], index1, row);
191 |   free(Matrix);
192 | }
193 | 		
194 | 	
195 | 		
196 | 			
197 | 


--------------------------------------------------------------------------------
/src/vector.h:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <assert.h>
 3 | 
 4 | Rcomplex *compArray(int num);
 5 | Rcomplex **compMatrix(int row, int col);
 6 | 
 7 | int *intArray(int num);
 8 | void PintArray(int *ivector, int length);
 9 | int **intMatrix(int row, int col);
10 | void PintMatrix(int **imatrix, int row, int col);
11 | 
12 | double *doubleArray(int num);
13 | void PdoubleArray(double *dvector, int length);
14 | double **doubleMatrix(int row, int col);
15 | void PdoubleMatrix(double **dmatrix, int row, int col);
16 | 
17 | double ***doubleMatrix3D(int x, int y, int z);
18 | void PdoubleMatrix3D(double ***dmatrix3D, int x, int y, int z);
19 | 
20 | double ****doubleMatrix4D(int x, int y, int z, int zz);
21 | 
22 | long *longArray(int num);
23 | 
24 | void FreeMatrix(double **Matrix, int row);
25 | void FreeintMatrix(int **Matrix, int row);
26 | void FreecompMatrix(Rcomplex **Matrix, int row);
27 | void Free3DMatrix(double ***Matrix, int index, int row);
28 | void Free4DMatrix(double ****Matrix, int index, int index1, int row);
29 | 


--------------------------------------------------------------------------------
/src/wfe.h:
--------------------------------------------------------------------------------
 1 | /* This file was automatically generated.  Do not edit! */
 2 | void LamdaDID2(int *len_Xtrow,int *len_Xhrow,int *Tunit_index,int *len_uniq_Tu_index,int *Hunit_index,int *len_uniq_Hu_index,double *Xtilde,int *len_Xtcol,double *utilde,double *Xhat,int *len_Xhcol,double *uhat,double *W,double *LamdaDID2);
 3 | void LamdaDID1(int *len_Xtrow,int *len_Xhrow,int *Tunit_index,int *len_uniq_Tu_index,int *Hunit_index,int *len_uniq_Hu_index,double *Xtilde,int *len_Xtcol,double *utilde,double *Xhat,int *len_Xhcol,double *uhat,double *W,double *LamdaDID1);
 4 | void OmegaDiDHC(int *len_data,int *n_cov,int *unit_index,int *len_uniq_u_index,double *Xtilde,double *utilde,double *W,double *Omega_DiD_HC);
 5 | void OmegaDiDHAC(int *len_data,int *n_cov,int *unit_index,int *len_uniq_u_index,double *Xtilde,double *utilde,double *W,double *Omega_DiD_HAC);
 6 | void LamdaHat(int *len_data,int *n_cov,int *unit_index,int *len_uniq_u_index,double *Xtilde,double *utilde,double *Xhat,double *uhat,double *Lamda_hat);
 7 | void OmegaHatHC(int *len_data,int *n_cov,int *unit_index,int *len_uniq_u_index,double *Xtilde,double *utilde,double *Omega_hat_HC);
 8 | void OmegaHatHAC(int *len_data,int *n_cov,int *unit_index,int *len_uniq_u_index,double *Xtilde,double *utilde,double *Omega_hat_HAC);
 9 | void XWXiSum(int *len_data,int *n_cov,int *unit_index,int *len_uniq_u_index,double *Xtilde,double *weights,double *result);
10 | void XXiSum(int *len_data,int *n_cov,int *unit_index,int *len_uniq_u_index,double *Xtilde,double *result);
11 | void MDummy(int *index,int *len_index,int *len_data,int *dummy);
12 | void GenWeightsDID(int *unit_index,int *time_index,int *tr,int *C_it,int *len_data,int *len_u_index,int *len_t_index,int *ate,int *att,int *verbose,double *weightdid);
13 | void CalDID(int *unit_index,int *time_index,int *tr,int *C_it,double *y,int *len_data,int *len_u_index,int *len_t_index,int *ate,int *att,int *verbose,double *did);
14 | int t_t1_same(int *unit_index,int *time_index,int *len_u_index,int *len_t_index,int *len_data,int *tr,int **same);
15 | int is_t_t1_same(int *u_i,int *t_i,int i,int j,int *tr,int size);
16 | void GenWeightsFD(int *unit_index,int *time_index,int *tr,int *C_it,int *len_data,int *len_u_index,int *len_t_index,int *ate,int *att,int *verbose,double *weightfd);
17 | void GenWeightsTime(int *time_index,int *unit_index,int *tr,int *C_it,int *len_data,int *len_t_index,int *len_u_index,int *ate,int *att,int *verbose,double *weight);
18 | void GenWeightsUnit(int *unit_index,int *time_index,int *tr,int *C_it,int *len_data,int *len_u_index,int *len_t_index,int *ate,int *att,int *verbose,double *weight);
19 | int is_time_index_exist(int *u_i,int *t_i,int i,int j,int size);
20 | int is_index_exist(int *unit_index,int *time_index,int *len_u_index,int *len_t_index,int *len_data,int **exist);
21 | void TwayDemean(double *var,int *unit_index,int *time_index,int *len_u_index,int *len_t_index,int *len_data,double *TwayDemean);
22 | void WWDemean(double *var,double *weight,int *index,int *len_index,int *len_data,double *WWdemean);
23 | void WDemean(double *var,double *weight,int *index,int *len_index,int *len_data,double *Wdemean);
24 | void Demean(double *var,int *index,int *len_index,int *len_data,double *demean);
25 | void GenTime(int *unit_index,int *len_data,int *len_u_index,double *time_index);
26 | void Transform(double *y,int *n,int *treat,double *pscore,double *ytrans);
27 | void Vectorize(double *Wvec,int *nrow,int *ncol,int *time_index,int *dyad_index,int *n_obs,double *results);
28 | void Index(int *index,int *uniq_index,int *len_u_index,int *len_data,int *result);
29 | 


--------------------------------------------------------------------------------