├── .gitignore ├── COPYRIGHTS.txt ├── LICENSE.txt ├── MANIFEST.in ├── README.rst ├── causalinference ├── __init__.py ├── causal.py ├── core │ ├── __init__.py │ ├── data.py │ ├── propensity.py │ ├── strata.py │ └── summary.py ├── estimators │ ├── __init__.py │ ├── base.py │ ├── blocking.py │ ├── matching.py │ ├── ols.py │ └── weighting.py └── utils │ ├── __init__.py │ ├── lalonde_data.txt │ ├── tools.py │ └── vignette_data.txt ├── docs ├── Makefile ├── _templates │ └── layout.html ├── causalinference.core.rst ├── causalinference.estimators.rst ├── causalinference.rst ├── causalinference.utils.rst ├── conf.py ├── favicon.png ├── index.rst └── tex │ ├── references.bib │ ├── vignette.pdf │ └── vignette.tex ├── setup.py └── tests ├── test_blocking.py ├── test_causal.py ├── test_data.py ├── test_matching.py ├── test_ols.py ├── test_propensity.py ├── test_propensityselect.py ├── test_summary.py ├── test_tools.py ├── test_weighting.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | *.pyc 3 | *.swp 4 | *.log 5 | *.aux 6 | *.bbl 7 | *.blg 8 | *.bak 9 | *.egg-info 10 | *.R 11 | dist 12 | interactive.py 13 | docs/_build 14 | -------------------------------------------------------------------------------- /COPYRIGHTS.txt: -------------------------------------------------------------------------------- 1 | The license of causalinference can be found in LICENSE.txt 2 | 3 | causalinference contains code or derivative code from several other 4 | packages. Collected below are the copyright statements of code from 5 | other packages. 6 | 7 | numpy 8 | --------------------------------------------------------------------------- 9 | Copyright (c) 2005-2009, NumPy Developers. 10 | All rights reserved. 11 | 12 | Redistribution and use in source and binary forms, with or without 13 | modification, are permitted provided that the following conditions are 14 | met: 15 | 16 | * Redistributions of source code must retain the above copyright 17 | notice, this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above 20 | copyright notice, this list of conditions and the following 21 | disclaimer in the documentation and/or other materials provided 22 | with the distribution. 23 | 24 | * Neither the name of the NumPy Developers nor the names of any 25 | contributors may be used to endorse or promote products derived 26 | from this software without specific prior written permission. 27 | 28 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 | --------------------------------------------------------------------------- 40 | 41 | scipy 42 | --------------------------------------------------------------------------- 43 | Copyright (c) 2001, 2002 Enthought, Inc. 44 | All rights reserved. 45 | 46 | Copyright (c) 2003-2009 SciPy Developers. 47 | All rights reserved. 48 | 49 | Redistribution and use in source and binary forms, with or without 50 | modification, are permitted provided that the following conditions are met: 51 | 52 | a. Redistributions of source code must retain the above copyright notice, 53 | this list of conditions and the following disclaimer. 54 | b. Redistributions in binary form must reproduce the above copyright 55 | notice, this list of conditions and the following disclaimer in the 56 | documentation and/or other materials provided with the distribution. 57 | c. Neither the name of the Enthought nor the names of its contributors 58 | may be used to endorse or promote products derived from this software 59 | without specific prior written permission. 60 | 61 | 62 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 63 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR 66 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 67 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 68 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 69 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 70 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 71 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 72 | DAMAGE. 73 | --------------------------------------------------------------------------- 74 | 75 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (C) 2015, Laurence Wong 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in the 11 | documentation and/or other materials provided with the distribution. 12 | 3. Neither the name of the copyright holder nor the names of its 13 | contributors may be used to endorse or promote products derived from 14 | this software without specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 20 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 | POSSIBILITY OF SUCH DAMAGE. 27 | 28 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | global-include *.py 2 | include *.txt 3 | include MANIFEST.in 4 | include README.rst 5 | include causalinference/utils/*.txt 6 | 7 | exclude interactive.py 8 | global-exclude *.swp *.pyc 9 | 10 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Causal Inference in Python 2 | ========================== 3 | 4 | *Causal Inference in Python*, or *Causalinference* in short, is a software package that implements various statistical and econometric methods used in the field variously known as Causal Inference, Program Evaluation, or Treatment Effect Analysis. 5 | 6 | Work on *Causalinference* started in 2014 by Laurence Wong as a personal side project. It is distributed under the 3-Clause BSD license. 7 | 8 | Important Links 9 | =============== 10 | 11 | The official website for *Causalinference* is 12 | 13 | https://causalinferenceinpython.org 14 | 15 | The most current development version is hosted on GitHub at 16 | 17 | https://github.com/laurencium/causalinference 18 | 19 | Package source and binary distribution files are available from PyPi at 20 | 21 | https://pypi.python.org/pypi/causalinference 22 | 23 | For an overview of the main features and uses of *Causalinference*, please refer to 24 | 25 | https://github.com/laurencium/causalinference/blob/master/docs/tex/vignette.pdf 26 | 27 | A blog dedicated to providing a more detailed walkthrough of *Causalinference* and the econometric theory behind it can be found at 28 | 29 | https://laurencewong.com/software/ 30 | 31 | Main Features 32 | ============= 33 | 34 | * Assessment of overlap in covariate distributions 35 | * Estimation of propensity score 36 | * Improvement of covariate balance through trimming 37 | * Subclassification on propensity score 38 | * Estimation of treatment effects via matching, blocking, weighting, and least squares 39 | 40 | Dependencies 41 | ============ 42 | 43 | * NumPy: 1.8.2 or higher 44 | * SciPy: 0.13.3 or higher 45 | 46 | Installation 47 | ============ 48 | 49 | *Causalinference* can be installed using ``pip``: :: 50 | 51 | $ pip install causalinference 52 | 53 | For help on setting up Pip, NumPy, and SciPy on Macs, check out this excellent `guide `_. 54 | 55 | Minimal Example 56 | =============== 57 | 58 | The following illustrates how to create an instance of CausalModel: :: 59 | 60 | >>> from causalinference import CausalModel 61 | >>> from causalinference.utils import random_data 62 | >>> Y, D, X = random_data() 63 | >>> causal = CausalModel(Y, D, X) 64 | 65 | Invoking ``help`` on ``causal`` at this point should return a comprehensive listing of all the causal analysis tools available in *Causalinference*. 66 | 67 | -------------------------------------------------------------------------------- /causalinference/__init__.py: -------------------------------------------------------------------------------- 1 | from .causal import CausalModel 2 | 3 | -------------------------------------------------------------------------------- /causalinference/causal.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | from itertools import combinations_with_replacement 4 | 5 | from .core import Data, Summary, Propensity, PropensitySelect, Strata 6 | from .estimators import OLS, Blocking, Weighting, Matching, Estimators 7 | 8 | 9 | class CausalModel(object): 10 | 11 | """ 12 | Class that provides the main tools of Causal Inference. 13 | """ 14 | 15 | def __init__(self, Y, D, X): 16 | 17 | self.old_data = Data(Y, D, X) 18 | self.reset() 19 | 20 | 21 | def reset(self): 22 | 23 | """ 24 | Reinitializes data to original inputs, and drops any estimated 25 | results. 26 | """ 27 | 28 | Y, D, X = self.old_data['Y'], self.old_data['D'], self.old_data['X'] 29 | self.raw_data = Data(Y, D, X) 30 | self.summary_stats = Summary(self.raw_data) 31 | self.propensity = None 32 | self.cutoff = None 33 | self.blocks = None 34 | self.strata = None 35 | self.estimates = Estimators() 36 | 37 | 38 | def est_propensity(self, lin='all', qua=None): 39 | 40 | """ 41 | Estimates the propensity scores given list of covariates to 42 | include linearly or quadratically. 43 | 44 | The propensity score is the conditional probability of 45 | receiving the treatment given the observed covariates. 46 | Estimation is done via a logistic regression. 47 | 48 | Parameters 49 | ---------- 50 | lin: string or list, optional 51 | Column numbers (zero-based) of variables of 52 | the original covariate matrix X to include 53 | linearly. Defaults to the string 'all', which 54 | uses whole covariate matrix. 55 | qua: list, optional 56 | Tuples indicating which columns of the original 57 | covariate matrix to multiply and include. E.g., 58 | [(1,1), (2,3)] indicates squaring the 2nd column 59 | and including the product of the 3rd and 4th 60 | columns. Default is to not include any 61 | quadratic terms. 62 | """ 63 | 64 | lin_terms = parse_lin_terms(self.raw_data['K'], lin) 65 | qua_terms = parse_qua_terms(self.raw_data['K'], qua) 66 | 67 | self.propensity = Propensity(self.raw_data, lin_terms, qua_terms) 68 | self.raw_data._dict['pscore'] = self.propensity['fitted'] 69 | self._post_pscore_init() 70 | 71 | 72 | def est_propensity_s(self, lin_B=None, C_lin=1, C_qua=2.71): 73 | 74 | """ 75 | Estimates the propensity score with covariates selected using 76 | the algorithm suggested by [1]_. 77 | 78 | The propensity score is the conditional probability of 79 | receiving the treatment given the observed covariates. 80 | Estimation is done via a logistic regression. 81 | 82 | The covariate selection algorithm is based on a sequence 83 | of likelihood ratio tests. 84 | 85 | Parameters 86 | ---------- 87 | lin_B: list, optional 88 | Column numbers (zero-based) of variables of 89 | the original covariate matrix X to include 90 | linearly. Defaults to empty list, meaning 91 | every column of X is subjected to the 92 | selection algorithm. 93 | C_lin: scalar, optional 94 | Critical value used in likelihood ratio tests 95 | to decide whether candidate linear terms should 96 | be included. Defaults to 1 as in [1]_. 97 | C_qua: scalar, optional 98 | Critical value used in likelihood ratio tests 99 | to decide whether candidate quadratic terms 100 | should be included. Defaults to 2.71 as in 101 | [1]_. 102 | 103 | References 104 | ---------- 105 | .. [1] Imbens, G. & Rubin, D. (2015). Causal Inference in 106 | Statistics, Social, and Biomedical Sciences: An 107 | Introduction. 108 | """ 109 | 110 | lin_basic = parse_lin_terms(self.raw_data['K'], lin_B) 111 | 112 | self.propensity = PropensitySelect(self.raw_data, lin_basic, 113 | C_lin, C_qua) 114 | self.raw_data._dict['pscore'] = self.propensity['fitted'] 115 | self._post_pscore_init() 116 | 117 | 118 | def trim(self): 119 | 120 | """ 121 | Trims data based on propensity score to create a subsample with 122 | better covariate balance. 123 | 124 | The default cutoff value is set to 0.1. To set a custom cutoff 125 | value, modify the object attribute named cutoff directly. 126 | 127 | This method should only be executed after the propensity score 128 | has been estimated. 129 | """ 130 | 131 | if 0 < self.cutoff <= 0.5: 132 | pscore = self.raw_data['pscore'] 133 | keep = (pscore >= self.cutoff) & (pscore <= 1-self.cutoff) 134 | Y_trimmed = self.raw_data['Y'][keep] 135 | D_trimmed = self.raw_data['D'][keep] 136 | X_trimmed = self.raw_data['X'][keep] 137 | self.raw_data = Data(Y_trimmed, D_trimmed, X_trimmed) 138 | self.raw_data._dict['pscore'] = pscore[keep] 139 | self.summary_stats = Summary(self.raw_data) 140 | self.strata = None 141 | self.estimates = Estimators() 142 | elif self.cutoff == 0: 143 | pass 144 | else: 145 | raise ValueError('Invalid cutoff.') 146 | 147 | 148 | def trim_s(self): 149 | 150 | """ 151 | Trims data based on propensity score using the cutoff 152 | selection algorithm suggested by [1]_. 153 | 154 | This method should only be executed after the propensity score 155 | has been estimated. 156 | 157 | References 158 | ---------- 159 | .. [1] Crump, R., Hotz, V., Imbens, G., & Mitnik, O. (2009). 160 | Dealing with Limited Overlap in Estimation of 161 | Average Treatment Effects. Biometrika, 96, 187-199. 162 | """ 163 | 164 | pscore = self.raw_data['pscore'] 165 | g = 1.0/(pscore*(1-pscore)) # 1 over Bernoulli variance 166 | 167 | self.cutoff = select_cutoff(g) 168 | self.trim() 169 | 170 | 171 | def stratify(self): 172 | 173 | """ 174 | Stratifies the sample based on propensity score. 175 | 176 | By default the sample is divided into five equal-sized bins. 177 | The number of bins can be set by modifying the object 178 | attribute named blocks. Alternatively, custom-sized bins can 179 | be created by setting blocks equal to a sorted list of numbers 180 | between 0 and 1 indicating the bin boundaries. 181 | 182 | This method should only be executed after the propensity score 183 | has been estimated. 184 | """ 185 | 186 | Y, D, X = self.raw_data['Y'], self.raw_data['D'], self.raw_data['X'] 187 | pscore = self.raw_data['pscore'] 188 | 189 | if isinstance(self.blocks, int): 190 | blocks = split_equal_bins(pscore, self.blocks) 191 | else: 192 | blocks = self.blocks[:] # make a copy; should be sorted 193 | blocks[0] = 0 # avoids always dropping 1st unit 194 | 195 | def subset(p_low, p_high): 196 | return (p_low < pscore) & (pscore <= p_high) 197 | subsets = [subset(*ps) for ps in zip(blocks, blocks[1:])] 198 | strata = [CausalModel(Y[s], D[s], X[s]) for s in subsets] 199 | self.strata = Strata(strata, subsets, pscore) 200 | 201 | 202 | def stratify_s(self): 203 | 204 | """ 205 | Stratifies the sample based on propensity score using the 206 | bin selection procedure suggested by [1]_. 207 | 208 | The bin selection algorithm is based on a sequence of 209 | two-sample t tests performed on the log-odds ratio. 210 | 211 | This method should only be executed after the propensity score 212 | has been estimated. 213 | 214 | References 215 | ---------- 216 | .. [1] Imbens, G. & Rubin, D. (2015). Causal Inference in 217 | Statistics, Social, and Biomedical Sciences: An 218 | Introduction. 219 | """ 220 | 221 | pscore_order = self.raw_data['pscore'].argsort() 222 | pscore = self.raw_data['pscore'][pscore_order] 223 | D = self.raw_data['D'][pscore_order] 224 | logodds = np.log(pscore / (1-pscore)) 225 | K = self.raw_data['K'] 226 | 227 | blocks_uniq = set(select_blocks(pscore, logodds, D, K, 0, 1)) 228 | self.blocks = sorted(blocks_uniq) 229 | self.stratify() 230 | 231 | 232 | def est_via_ols(self, adj=2): 233 | 234 | """ 235 | Estimates average treatment effects using least squares. 236 | 237 | Parameters 238 | ---------- 239 | adj: int (0, 1, or 2) 240 | Indicates how covariate adjustments are to be 241 | performed. Set adj = 0 to not include any 242 | covariates. Set adj = 1 to include treatment 243 | indicator D and covariates X separately. Set 244 | adj = 2 to additionally include interaction 245 | terms between D and X. Defaults to 2. 246 | """ 247 | 248 | self.estimates['ols'] = OLS(self.raw_data, adj) 249 | 250 | 251 | def est_via_blocking(self, adj=1): 252 | 253 | """ 254 | Estimates average treatment effects using regression within 255 | blocks. 256 | 257 | This method should only be executed after the sample has been 258 | stratified. 259 | 260 | Parameters 261 | ---------- 262 | adj: int (0, 1, or 2) 263 | Indicates how covariate adjustments are to be 264 | performed for each within-bin regression. 265 | Set adj = 0 to not include any covariates. 266 | Set adj = 1 to include treatment indicator D 267 | and covariates X separately. Set adj = 2 to 268 | additionally include interaction terms between 269 | D and X. Defaults to 1. 270 | """ 271 | 272 | self.estimates['blocking'] = Blocking(self.strata, adj) 273 | 274 | 275 | def est_via_weighting(self): 276 | 277 | """ 278 | Estimates average treatment effects using doubly-robust 279 | version of the Horvitz-Thompson weighting estimator. 280 | """ 281 | 282 | self.estimates['weighting'] = Weighting(self.raw_data) 283 | 284 | 285 | def est_via_matching(self, weights='inv', matches=1, bias_adj=False): 286 | 287 | """ 288 | Estimates average treatment effects using nearest- 289 | neighborhood matching. 290 | 291 | Matching is done with replacement. Method supports multiple 292 | matching. Correcting bias that arise due to imperfect matches 293 | is also supported. For details on methodology, see [1]_. 294 | 295 | Parameters 296 | ---------- 297 | weights: str or positive definite square matrix 298 | Specifies weighting matrix used in computing 299 | distance measures. Defaults to string 'inv', 300 | which does inverse variance weighting. String 301 | 'maha' gives the weighting matrix used in the 302 | Mahalanobis metric. 303 | matches: int 304 | Number of matches to use for each subject. 305 | bias_adj: bool 306 | Specifies whether bias adjustments should be 307 | attempted. 308 | 309 | References 310 | ---------- 311 | .. [1] Imbens, G. & Rubin, D. (2015). Causal Inference in 312 | Statistics, Social, and Biomedical Sciences: An 313 | Introduction. 314 | """ 315 | 316 | X, K = self.raw_data['X'], self.raw_data['K'] 317 | X_c, X_t = self.raw_data['X_c'], self.raw_data['X_t'] 318 | 319 | if weights == 'inv': 320 | W = 1/X.var(0) 321 | elif weights == 'maha': 322 | V_c = np.cov(X_c, rowvar=False, ddof=0) 323 | V_t = np.cov(X_t, rowvar=False, ddof=0) 324 | if K == 1: 325 | W = 1/np.array([[(V_c+V_t)/2]]) # matrix form 326 | else: 327 | W = np.linalg.inv((V_c+V_t)/2) 328 | else: 329 | W = weights 330 | 331 | self.estimates['matching'] = Matching(self.raw_data, W, 332 | matches, bias_adj) 333 | 334 | 335 | def _post_pscore_init(self): 336 | 337 | self.cutoff = 0.1 338 | self.blocks = 5 339 | 340 | 341 | def parse_lin_terms(K, lin): 342 | 343 | if lin is None: 344 | return [] 345 | elif lin == 'all': 346 | return range(K) 347 | else: 348 | return lin 349 | 350 | 351 | def parse_qua_terms(K, qua): 352 | 353 | if qua is None: 354 | return [] 355 | elif qua == 'all': 356 | return list(combinations_with_replacement(range(K), 2)) 357 | else: 358 | return qua 359 | 360 | 361 | def sumlessthan(g, sorted_g, cumsum): 362 | 363 | deduped_values = dict(zip(sorted_g, cumsum)) 364 | 365 | return np.array([deduped_values[x] for x in g]) 366 | 367 | 368 | def select_cutoff(g): 369 | 370 | if g.max() <= 2*g.mean(): 371 | cutoff = 0 372 | else: 373 | sorted_g = np.sort(g) 374 | cumsum_1 = range(1, len(g)+1) 375 | LHS = g * sumlessthan(g, sorted_g, cumsum_1) 376 | cumsum_g = np.cumsum(sorted_g) 377 | RHS = 2 * sumlessthan(g, sorted_g, cumsum_g) 378 | gamma = np.max(g[LHS <= RHS]) 379 | cutoff = 0.5 - np.sqrt(0.25 - 1./gamma) 380 | 381 | return cutoff 382 | 383 | 384 | def split_equal_bins(pscore, blocks): 385 | 386 | q = np.linspace(0, 100, blocks+1)[1:-1] # q as in qth centiles 387 | centiles = [np.percentile(pscore, x) for x in q] 388 | 389 | return [0] + centiles + [1] 390 | 391 | 392 | def calc_tstat(sample_c, sample_t): 393 | 394 | N_c = sample_c.shape[0] 395 | N_t = sample_t.shape[0] 396 | var_c = sample_c.var(ddof=1) 397 | var_t = sample_t.var(ddof=1) 398 | 399 | return (sample_t.mean()-sample_c.mean()) / np.sqrt(var_c/N_c+var_t/N_t) 400 | 401 | 402 | def calc_sample_sizes(D): 403 | 404 | N = D.shape[0] 405 | mid_index = N // 2 406 | 407 | Nleft = mid_index 408 | Nleft_t = D[:mid_index].sum() 409 | Nleft_c = Nleft - Nleft_t 410 | 411 | Nright = N - Nleft 412 | Nright_t = D[mid_index:].sum() 413 | Nright_c = Nright - Nright_t 414 | 415 | return (Nleft_c, Nleft_t, Nright_c, Nright_t) 416 | 417 | 418 | def select_blocks(pscore, logodds, D, K, p_low, p_high): 419 | 420 | scope = (pscore >= p_low) & (pscore <= p_high) 421 | c, t = (scope & (D==0)), (scope & (D==1)) 422 | 423 | Nleft_c, Nleft_t, Nright_c, Nright_t = calc_sample_sizes(D[scope]) 424 | if min(Nleft_c, Nleft_t, Nright_c, Nright_t) < K+1: 425 | return [p_low, p_high] 426 | 427 | tstat = calc_tstat(logodds[c], logodds[t]) 428 | if tstat <= 1.96: 429 | return [p_low, p_high] 430 | 431 | low = pscore[scope][0] 432 | mid = pscore[scope][scope.sum() // 2] 433 | high = pscore[scope][-1] 434 | 435 | return select_blocks(pscore, logodds, D, K, low, mid) + \ 436 | select_blocks(pscore, logodds, D, K, mid, high) 437 | 438 | -------------------------------------------------------------------------------- /causalinference/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .data import Dict, Data 2 | from .summary import Summary 3 | from .propensity import Propensity, PropensitySelect 4 | from .strata import Strata 5 | 6 | -------------------------------------------------------------------------------- /causalinference/core/data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class Dict(object): 5 | 6 | """ 7 | Dictionary-mimicking class. 8 | """ 9 | 10 | def __getitem__(self, key): 11 | 12 | return self._dict[key] 13 | 14 | 15 | def __iter__(self): 16 | 17 | return iter(self._dict) 18 | 19 | 20 | def __repr__(self): 21 | 22 | return self._dict.__repr__() 23 | 24 | 25 | def keys(self): 26 | 27 | return self._dict.keys() 28 | 29 | 30 | def iteritems(self): 31 | 32 | return self._dict.iteritems() 33 | 34 | 35 | def get(self, key, default=None): 36 | 37 | return self._dict.get(key, default) 38 | 39 | 40 | class Data(Dict): 41 | 42 | """ 43 | Dictionary-like class containing basic data. 44 | """ 45 | 46 | def __init__(self, outcome, treatment, covariates): 47 | 48 | Y, D, X = preprocess(outcome, treatment, covariates) 49 | self._dict = dict() 50 | self._dict['Y'] = Y 51 | self._dict['D'] = D 52 | self._dict['X'] = X 53 | self._dict['N'], self._dict['K'] = X.shape 54 | self._dict['controls'] = (D==0) 55 | self._dict['treated'] = (D==1) 56 | self._dict['Y_c'] = Y[self._dict['controls']] 57 | self._dict['Y_t'] = Y[self._dict['treated']] 58 | self._dict['X_c'] = X[self._dict['controls']] 59 | self._dict['X_t'] = X[self._dict['treated']] 60 | self._dict['N_t'] = D.sum() 61 | self._dict['N_c'] = self._dict['N'] - self._dict['N_t'] 62 | if self._dict['K']+1 > self._dict['N_c']: 63 | raise ValueError('Too few control units: N_c < K+1') 64 | if self._dict['K']+1 > self._dict['N_t']: 65 | raise ValueError('Too few treated units: N_t < K+1') 66 | 67 | 68 | def preprocess(Y, D, X): 69 | 70 | if Y.shape[0] == D.shape[0] == X.shape[0]: 71 | N = Y.shape[0] 72 | else: 73 | raise IndexError('Input data have different number of rows') 74 | 75 | if Y.shape != (N, ): 76 | Y.shape = (N, ) 77 | if D.shape != (N, ): 78 | D.shape = (N, ) 79 | if D.dtype != 'int': 80 | D = D.astype(int) 81 | if X.shape == (N, ): 82 | X.shape = (N, 1) 83 | 84 | return (Y, D, X) 85 | 86 | -------------------------------------------------------------------------------- /causalinference/core/propensity.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | from scipy.optimize import fmin_bfgs 4 | from itertools import combinations_with_replacement 5 | 6 | import causalinference.utils.tools as tools 7 | from .data import Dict 8 | 9 | 10 | class Propensity(Dict): 11 | 12 | """ 13 | Dictionary-like class containing propensity score data. 14 | 15 | Propensity score related data includes estimated logistic regression 16 | coefficients, maximized log-likelihood, predicted propensity scores, 17 | and lists of the linear and quadratic terms that are included in the 18 | logistic regression. 19 | """ 20 | 21 | def __init__(self, data, lin, qua): 22 | 23 | Z = form_matrix(data['X'], lin, qua) 24 | Z_c, Z_t = Z[data['controls']], Z[data['treated']] 25 | beta = calc_coef(Z_c, Z_t) 26 | 27 | self._data = data 28 | self._dict = dict() 29 | self._dict['lin'], self._dict['qua'] = lin, qua 30 | self._dict['coef'] = beta 31 | self._dict['loglike'] = -neg_loglike(beta, Z_c, Z_t) 32 | self._dict['fitted'] = sigmoid(Z.dot(beta)) 33 | self._dict['se'] = calc_se(Z, self._dict['fitted']) 34 | 35 | 36 | def __str__(self): 37 | 38 | table_width = 80 39 | 40 | coefs = self._dict['coef'] 41 | ses = self._dict['se'] 42 | 43 | output = '\n' 44 | output += 'Estimated Parameters of Propensity Score\n\n' 45 | 46 | entries1 = ['', 'Coef.', 'S.e.', 'z', 'P>|z|', 47 | '[95% Conf. int.]'] 48 | entry_types1 = ['string']*6 49 | col_spans1 = [1]*5 + [2] 50 | output += tools.add_row(entries1, entry_types1, 51 | col_spans1, table_width) 52 | output += tools.add_line(table_width) 53 | 54 | entries2 = tools.gen_reg_entries('Intercept', coefs[0], ses[0]) 55 | entry_types2 = ['string'] + ['float']*6 56 | col_spans2 = [1]*7 57 | output += tools.add_row(entries2, entry_types2, 58 | col_spans2, table_width) 59 | 60 | lin = self._dict['lin'] 61 | for (lin_term, coef, se) in zip(lin, coefs[1:], ses[1:]): 62 | entries3 = tools.gen_reg_entries('X'+str(lin_term), 63 | coef, se) 64 | output += tools.add_row(entries3, entry_types2, 65 | col_spans2, table_width) 66 | 67 | qua = self._dict['qua'] 68 | lin_num = len(lin)+1 # including intercept 69 | for (qua_term, coef, se) in zip(qua, coefs[lin_num:], 70 | ses[lin_num:]): 71 | name = 'X'+str(qua_term[0])+'*X'+str(qua_term[1]) 72 | entries4 = tools.gen_reg_entries(name, coef, se) 73 | output += tools.add_row(entries4, entry_types2, 74 | col_spans2, table_width) 75 | 76 | return output 77 | 78 | 79 | class PropensitySelect(Propensity): 80 | 81 | """ 82 | Dictionary-like class containing propensity score data. 83 | 84 | Propensity score related data includes estimated logistic regression 85 | coefficients, maximized log-likelihood, predicted propensity scores, 86 | and lists of the linear and quadratic terms that are included in the 87 | logistic regression. 88 | """ 89 | 90 | def __init__(self, data, lin_B, C_lin, C_qua): 91 | 92 | X_c, X_t = data['X_c'], data['X_t'] 93 | lin = select_lin_terms(X_c, X_t, lin_B, C_lin) 94 | qua = select_qua_terms(X_c, X_t, lin, C_qua) 95 | 96 | super(PropensitySelect, self).__init__(data, lin, qua) 97 | 98 | 99 | def form_matrix(X, lin, qua): 100 | 101 | N, K = X.shape 102 | 103 | mat = np.empty((N, 1+len(lin)+len(qua))) 104 | mat[:, 0] = 1 # constant term 105 | 106 | current_col = 1 107 | if lin: 108 | mat[:, current_col:current_col+len(lin)] = X[:, lin] 109 | current_col += len(lin) 110 | for term in qua: # qua is a list of tuples of column numbers 111 | mat[:, current_col] = X[:, term[0]] * X[:, term[1]] 112 | current_col += 1 113 | 114 | return mat 115 | 116 | 117 | def sigmoid(x, top_threshold=100, bottom_threshold=-100): 118 | 119 | high_x = (x >= top_threshold) 120 | low_x = (x <= bottom_threshold) 121 | mid_x = ~(high_x | low_x) 122 | 123 | values = np.empty(x.shape[0]) 124 | values[high_x] = 1.0 125 | values[low_x] = 0.0 126 | values[mid_x] = 1/(1+np.exp(-x[mid_x])) 127 | 128 | return values 129 | 130 | 131 | def log1exp(x, top_threshold=100, bottom_threshold=-100): 132 | 133 | high_x = (x >= top_threshold) 134 | low_x = (x <= bottom_threshold) 135 | mid_x = ~(high_x | low_x) 136 | 137 | values = np.empty(x.shape[0]) 138 | values[high_x] = 0.0 139 | values[low_x] = -x[low_x] 140 | values[mid_x] = np.log(1 + np.exp(-x[mid_x])) 141 | 142 | return values 143 | 144 | 145 | def neg_loglike(beta, X_c, X_t): 146 | 147 | return log1exp(X_t.dot(beta)).sum() + log1exp(-X_c.dot(beta)).sum() 148 | 149 | 150 | def neg_gradient(beta, X_c, X_t): 151 | 152 | return (sigmoid(X_c.dot(beta))*X_c.T).sum(1) - \ 153 | (sigmoid(-X_t.dot(beta))*X_t.T).sum(1) 154 | 155 | 156 | def calc_coef(X_c, X_t): 157 | 158 | K = X_c.shape[1] 159 | 160 | neg_ll = lambda b: neg_loglike(b, X_c, X_t) 161 | neg_grad = lambda b: neg_gradient(b, X_c, X_t) 162 | 163 | logit = fmin_bfgs(neg_ll, np.zeros(K), neg_grad, 164 | full_output=True, disp=False) 165 | 166 | return logit[0] 167 | 168 | 169 | def calc_se(X, phat): 170 | 171 | H = np.dot(phat*(1-phat)*X.T, X) 172 | 173 | return np.sqrt(np.diag(np.linalg.inv(H))) 174 | 175 | 176 | def get_excluded_lin(K, included): 177 | 178 | included_set = set(included) 179 | 180 | return [x for x in range(K) if x not in included_set] 181 | 182 | 183 | def get_excluded_qua(lin, included): 184 | 185 | whole_set = list(combinations_with_replacement(lin, 2)) 186 | included_set = set(included) 187 | 188 | return [x for x in whole_set if x not in included_set] 189 | 190 | 191 | def calc_loglike(X_c, X_t, lin, qua): 192 | 193 | Z_c = form_matrix(X_c, lin, qua) 194 | Z_t = form_matrix(X_t, lin, qua) 195 | beta = calc_coef(Z_c, Z_t) 196 | 197 | return -neg_loglike(beta, Z_c, Z_t) 198 | 199 | 200 | def select_lin(X_c, X_t, lin_B, C_lin): 201 | 202 | # Selects, through a sequence of likelihood ratio tests, the 203 | # variables that should be included linearly in propensity 204 | # score estimation. 205 | 206 | K = X_c.shape[1] 207 | excluded = get_excluded_lin(K, lin_B) 208 | if excluded == []: 209 | return lin_B 210 | 211 | ll_null = calc_loglike(X_c, X_t, lin_B, []) 212 | 213 | def lr_stat_lin(lin_term): 214 | ll_alt = calc_loglike(X_c, X_t, lin_B+[lin_term], []) 215 | return 2 * (ll_alt - ll_null) 216 | 217 | lr_stats = np.array([lr_stat_lin(term) for term in excluded]) 218 | argmax_lr = lr_stats.argmax() 219 | 220 | if lr_stats[argmax_lr] < C_lin: 221 | return lin_B 222 | else: 223 | new_term = [excluded[argmax_lr]] 224 | return select_lin(X_c, X_t, lin_B+new_term, C_lin) 225 | 226 | 227 | def select_lin_terms(X_c, X_t, lin_B, C_lin): 228 | 229 | # Mostly a wrapper around function select_lin to handle cases that 230 | # require little computation. 231 | 232 | if C_lin <= 0: 233 | K = X_c.shape[1] 234 | return lin_B + get_excluded_lin(K, lin_B) 235 | elif C_lin == np.inf: 236 | return lin_B 237 | else: 238 | return select_lin(X_c, X_t, lin_B, C_lin) 239 | 240 | 241 | def select_qua(X_c, X_t, lin, qua_B, C_qua): 242 | 243 | # Selects, through a sequence of likelihood ratio tests, the 244 | # variables that should be included quadratically in propensity 245 | # score estimation. 246 | 247 | excluded = get_excluded_qua(lin, qua_B) 248 | if excluded == []: 249 | return qua_B 250 | 251 | ll_null = calc_loglike(X_c, X_t, lin, qua_B) 252 | 253 | def lr_stat_qua(qua_term): 254 | ll_alt = calc_loglike(X_c, X_t, lin, qua_B+[qua_term]) 255 | return 2 * (ll_alt - ll_null) 256 | 257 | lr_stats = np.array([lr_stat_qua(term) for term in excluded]) 258 | argmax_lr = lr_stats.argmax() 259 | 260 | if lr_stats[argmax_lr] < C_qua: 261 | return qua_B 262 | else: 263 | new_term = [excluded[argmax_lr]] 264 | return select_qua(X_c, X_t, lin, qua_B+new_term, C_qua) 265 | 266 | 267 | def select_qua_terms(X_c, X_t, lin, C_qua): 268 | 269 | # Mostly a wrapper around function select_qua to handle cases that 270 | # require little computation. 271 | 272 | if lin == []: 273 | return [] 274 | if C_qua <= 0: 275 | return get_excluded_qua(lin, []) 276 | elif C_qua == np.inf: 277 | return [] 278 | else: 279 | return select_qua(X_c, X_t, lin, [], C_qua) 280 | 281 | -------------------------------------------------------------------------------- /causalinference/core/strata.py: -------------------------------------------------------------------------------- 1 | import causalinference.utils.tools as tools 2 | 3 | 4 | class Strata(object): 5 | 6 | """ 7 | List-like object containing the stratified propensity bins. 8 | """ 9 | 10 | def __init__(self, strata, subsets, pscore): 11 | 12 | self._strata = strata 13 | for stratum, subset in zip(self._strata, subsets): 14 | pscore_sub = pscore[subset] 15 | stratum.raw_data._dict['pscore'] = pscore_sub 16 | D_sub = stratum.raw_data['D'] 17 | pscore_sub_c = pscore_sub[D_sub==0] 18 | pscore_sub_t = pscore_sub[D_sub==1] 19 | stratum.summary_stats._summarize_pscore(pscore_sub_c, 20 | pscore_sub_t) 21 | 22 | 23 | def __len__(self): 24 | 25 | return len(self._strata) 26 | 27 | 28 | def __getitem__(self, index): 29 | 30 | return self._strata[index] 31 | 32 | 33 | def __str__(self): 34 | 35 | table_width = 80 36 | 37 | output = '\n' 38 | output += 'Stratification Summary\n\n' 39 | 40 | entries1 = ['', 'Propensity Score', 'Sample Size', 41 | 'Ave. Propensity', 'Outcome'] 42 | entry_types1 = ['string']*5 43 | col_spans1 = [1, 2, 2, 2, 1] 44 | output += tools.add_row(entries1, entry_types1, 45 | col_spans1, table_width) 46 | 47 | entries2 = ['Stratum', 'Min.', 'Max.', 'Controls', 'Treated', 48 | 'Controls', 'Treated', 'Raw-diff'] 49 | entry_types2 = ['string']*8 50 | col_spans2 = [1]*8 51 | output += tools.add_row(entries2, entry_types2, 52 | col_spans2, table_width) 53 | output += tools.add_line(table_width) 54 | 55 | strata = self._strata 56 | entry_types3 = ['integer', 'float', 'float', 'integer', 57 | 'integer', 'float', 'float', 'float'] 58 | for i in range(len(strata)): 59 | summary = strata[i].summary_stats 60 | N_c, N_t = summary['N_c'], summary['N_t'] 61 | p_min, p_max = summary['p_min'], summary['p_max'] 62 | p_c_mean = summary['p_c_mean'] 63 | p_t_mean = summary['p_t_mean'] 64 | within = summary['rdiff'] 65 | entries3 = [i+1, p_min, p_max, N_c, N_t, 66 | p_c_mean, p_t_mean, within] 67 | output += tools.add_row(entries3, entry_types3, 68 | col_spans2, table_width) 69 | 70 | return output 71 | 72 | -------------------------------------------------------------------------------- /causalinference/core/summary.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | 4 | import causalinference.utils.tools as tools 5 | from .data import Dict 6 | 7 | 8 | class Summary(Dict): 9 | 10 | """ 11 | Dictionary-like class containing summary statistics for input data. 12 | 13 | One of the summary statistics is the normalized difference between 14 | covariates. Large values indicate that simple linear adjustment methods 15 | may not be adequate for removing biases that are associated with 16 | differences in covariates. 17 | """ 18 | 19 | def __init__(self, data): 20 | 21 | self._dict = dict() 22 | 23 | self._dict['N'], self._dict['K'] = data['N'], data['K'] 24 | self._dict['N_c'], self._dict['N_t'] = data['N_c'], data['N_t'] 25 | self._dict['Y_c_mean'] = data['Y_c'].mean() 26 | self._dict['Y_t_mean'] = data['Y_t'].mean() 27 | self._dict['Y_c_sd'] = np.sqrt(data['Y_c'].var(ddof=1)) 28 | self._dict['Y_t_sd'] = np.sqrt(data['Y_t'].var(ddof=1)) 29 | self._dict['rdiff'] = self['Y_t_mean'] - self['Y_c_mean'] 30 | self._dict['X_c_mean'] = data['X_c'].mean(0) 31 | self._dict['X_t_mean'] = data['X_t'].mean(0) 32 | self._dict['X_c_sd'] = np.sqrt(data['X_c'].var(0, ddof=1)) 33 | self._dict['X_t_sd'] = np.sqrt(data['X_t'].var(0, ddof=1)) 34 | self._dict['ndiff'] = calc_ndiff(self['X_c_mean'], 35 | self['X_t_mean'], 36 | self['X_c_sd'], 37 | self['X_t_sd']) 38 | 39 | 40 | def _summarize_pscore(self, pscore_c, pscore_t): 41 | 42 | """ 43 | Called by Strata class during initialization. 44 | """ 45 | 46 | self._dict['p_min'] = min(pscore_c.min(), pscore_t.min()) 47 | self._dict['p_max'] = max(pscore_c.max(), pscore_t.max()) 48 | self._dict['p_c_mean'] = pscore_c.mean() 49 | self._dict['p_t_mean'] = pscore_t.mean() 50 | 51 | 52 | def __str__(self): 53 | 54 | table_width = 80 55 | 56 | N_c, N_t, K = self['N_c'], self['N_t'], self['K'] 57 | Y_c_mean, Y_t_mean = self['Y_c_mean'], self['Y_t_mean'] 58 | Y_c_sd, Y_t_sd = self['Y_c_sd'], self['Y_t_sd'] 59 | X_c_mean, X_t_mean = self['X_c_mean'], self['X_t_mean'] 60 | X_c_sd, X_t_sd = self['X_c_sd'], self['X_t_sd'] 61 | rdiff, ndiff = self['rdiff'], self['ndiff'] 62 | varnames = ['X'+str(i) for i in range(K)] 63 | 64 | output = '\n' 65 | output += 'Summary Statistics\n\n' 66 | 67 | entries1 = ['', 'Controls (N_c='+str(N_c)+')', 68 | 'Treated (N_t='+str(N_t)+')', ''] 69 | entry_types1 = ['string']*4 70 | col_spans1 = [1, 2, 2, 1] 71 | output += tools.add_row(entries1, entry_types1, 72 | col_spans1, table_width) 73 | 74 | entries2 = ['Variable', 'Mean', 'S.d.', 75 | 'Mean', 'S.d.', 'Raw-diff'] 76 | entry_types2 = ['string']*6 77 | col_spans2 = [1]*6 78 | output += tools.add_row(entries2, entry_types2, 79 | col_spans2, table_width) 80 | output += tools.add_line(table_width) 81 | 82 | entries3 = ['Y', Y_c_mean, Y_c_sd, Y_t_mean, Y_t_sd, rdiff] 83 | entry_types3 = ['string'] + ['float']*5 84 | col_spans3 = [1]*6 85 | output += tools.add_row(entries3, entry_types3, 86 | col_spans3, table_width) 87 | 88 | output += '\n' 89 | output += tools.add_row(entries1, entry_types1, 90 | col_spans1, table_width) 91 | 92 | entries4 = ['Variable', 'Mean', 'S.d.', 93 | 'Mean', 'S.d.', 'Nor-diff'] 94 | output += tools.add_row(entries4, entry_types2, 95 | col_spans2, table_width) 96 | output += tools.add_line(table_width) 97 | 98 | entry_types5 = ['string'] + ['float']*5 99 | col_spans5 = [1]*6 100 | for entries5 in zip(varnames, X_c_mean, X_c_sd, 101 | X_t_mean, X_t_sd, ndiff): 102 | output += tools.add_row(entries5, entry_types5, 103 | col_spans5, table_width) 104 | 105 | return output 106 | 107 | 108 | def calc_ndiff(mean_c, mean_t, sd_c, sd_t): 109 | 110 | return (mean_t-mean_c) / np.sqrt((sd_c**2+sd_t**2)/2) 111 | 112 | -------------------------------------------------------------------------------- /causalinference/estimators/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Estimators 2 | from .ols import OLS 3 | from .blocking import Blocking 4 | from .weighting import Weighting 5 | from .matching import Matching 6 | 7 | -------------------------------------------------------------------------------- /causalinference/estimators/base.py: -------------------------------------------------------------------------------- 1 | import causalinference.utils.tools as tools 2 | from ..core import Dict 3 | 4 | 5 | class Estimator(Dict): 6 | 7 | """ 8 | Dictionary-like class containing treatment effect estimates. 9 | """ 10 | 11 | def __str__(self): 12 | 13 | table_width = 80 14 | 15 | names = ['ate', 'atc', 'att'] 16 | coefs = [self[name] for name in names if name in self.keys()] 17 | ses = [self[name+'_se'] for name in names if name+'_se' in self.keys()] 18 | 19 | output = '\n' 20 | output += 'Treatment Effect Estimates: ' + self._method + '\n\n' 21 | 22 | entries1 = ['', 'Est.', 'S.e.', 'z', 'P>|z|', 23 | '[95% Conf. int.]'] 24 | entry_types1 = ['string']*6 25 | col_spans1 = [1]*5 + [2] 26 | output += tools.add_row(entries1, entry_types1, 27 | col_spans1, table_width) 28 | output += tools.add_line(table_width) 29 | 30 | entry_types2 = ['string'] + ['float']*6 31 | col_spans2 = [1]*7 32 | for (name, coef, se) in zip(names, coefs, ses): 33 | entries2 = tools.gen_reg_entries(name.upper(), coef, se) 34 | output += tools.add_row(entries2, entry_types2, 35 | col_spans2, table_width) 36 | 37 | return output 38 | 39 | 40 | class Estimators(Dict): 41 | 42 | """ 43 | Dictionary-like class containing treatment effect estimates for each 44 | estimator used. 45 | """ 46 | 47 | def __init__(self): 48 | 49 | self._dict = {} 50 | 51 | 52 | def __setitem__(self, key, item): 53 | 54 | self._dict[key] = item 55 | 56 | 57 | def __str__(self): 58 | 59 | output = '' 60 | for method in self.keys(): 61 | output += self[method].__str__() 62 | 63 | return output 64 | 65 | -------------------------------------------------------------------------------- /causalinference/estimators/blocking.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | 4 | from .base import Estimator 5 | 6 | 7 | class Blocking(Estimator): 8 | 9 | """ 10 | Dictionary-like class containing treatment effect estimates. 11 | """ 12 | 13 | def __init__(self, strata, adj): 14 | 15 | self._method = 'Blocking' 16 | for s in strata: 17 | s.est_via_ols(adj) 18 | 19 | Ns = [s.raw_data['N'] for s in strata] 20 | N_cs = [s.raw_data['N_c'] for s in strata] 21 | N_ts = [s.raw_data['N_t'] for s in strata] 22 | 23 | ates = [s.estimates['ols']['ate'] for s in strata] 24 | ate_ses = [s.estimates['ols']['ate_se'] for s in strata] 25 | if adj <= 1: 26 | atcs, atts = ates, ates 27 | atc_ses, att_ses = ate_ses, ate_ses 28 | else: 29 | atcs = [s.estimates['ols']['atc'] for s in strata] 30 | atts = [s.estimates['ols']['att'] for s in strata] 31 | atc_ses = [s.estimates['ols']['atc_se'] for s in strata] 32 | att_ses = [s.estimates['ols']['att_se'] for s in strata] 33 | 34 | self._dict = dict() 35 | self._dict['ate'] = calc_atx(ates, Ns) 36 | self._dict['atc'] = calc_atx(atcs, N_cs) 37 | self._dict['att'] = calc_atx(atts, N_ts) 38 | 39 | self._dict['ate_se'] = calc_atx_se(ate_ses, Ns) 40 | self._dict['atc_se'] = calc_atx_se(atc_ses, N_cs) 41 | self._dict['att_se'] = calc_atx_se(att_ses, N_ts) 42 | 43 | 44 | def calc_atx(atxs, Ns): 45 | 46 | N = sum(Ns) 47 | 48 | return np.sum(np.array(atxs) * np.array(Ns)) / N 49 | 50 | 51 | def calc_atx_se(atx_ses, Ns): 52 | 53 | N = sum(Ns) 54 | var = np.sum(np.array(atx_ses)**2 * np.array(Ns)**2) / N**2 55 | 56 | return np.sqrt(var) 57 | 58 | -------------------------------------------------------------------------------- /causalinference/estimators/matching.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | from itertools import chain 4 | from functools import reduce 5 | 6 | from .base import Estimator 7 | 8 | 9 | class Matching(Estimator): 10 | 11 | """ 12 | Dictionary-like class containing treatment effect estimates. Standard 13 | errors are only computed when needed. 14 | """ 15 | 16 | def __init__(self, data, W, m, bias_adj): 17 | 18 | self._method = 'Matching' 19 | N, N_c, N_t = data['N'], data['N_c'], data['N_t'] 20 | Y_c, Y_t = data['Y_c'], data['Y_t'] 21 | X_c, X_t = data['X_c'], data['X_t'] 22 | 23 | matches_c = [match(X_i, X_t, W, m) for X_i in X_c] 24 | matches_t = [match(X_i, X_c, W, m) for X_i in X_t] 25 | Yhat_c = np.array([Y_t[idx].mean() for idx in matches_c]) 26 | Yhat_t = np.array([Y_c[idx].mean() for idx in matches_t]) 27 | ITT_c = Yhat_c - Y_c 28 | ITT_t = Y_t - Yhat_t 29 | 30 | if bias_adj: 31 | bias_coefs_c = bias_coefs(matches_c, Y_t, X_t) 32 | bias_coefs_t = bias_coefs(matches_t, Y_c, X_c) 33 | bias_c = bias(X_c, X_t, matches_c, bias_coefs_c) 34 | bias_t = bias(X_t, X_c, matches_t, bias_coefs_t) 35 | ITT_c = ITT_c - bias_c 36 | ITT_t = ITT_t + bias_t 37 | 38 | self._dict = dict() 39 | self._dict['atc'] = ITT_c.mean() 40 | self._dict['att'] = ITT_t.mean() 41 | self._dict['ate'] = (N_c/N)*self['atc'] + (N_t/N)*self['att'] 42 | 43 | scaled_counts_c = scaled_counts(N_c, matches_t) 44 | scaled_counts_t = scaled_counts(N_t, matches_c) 45 | vars_c = np.repeat(ITT_c.var(), N_c) # conservative 46 | vars_t = np.repeat(ITT_t.var(), N_t) # conservative 47 | self._dict['atc_se'] = calc_atc_se(vars_c, vars_t, scaled_counts_t) 48 | self._dict['att_se'] = calc_att_se(vars_c, vars_t, scaled_counts_c) 49 | self._dict['ate_se'] = calc_ate_se(vars_c, vars_t, 50 | scaled_counts_c, 51 | scaled_counts_t) 52 | 53 | 54 | def norm(X_i, X_m, W): 55 | 56 | dX = X_m - X_i 57 | if W.ndim == 1: 58 | return (dX**2 * W).sum(1) 59 | else: 60 | return (dX.dot(W)*dX).sum(1) 61 | 62 | 63 | def smallestm(d, m): 64 | 65 | # Finds indices of the smallest m numbers in an array. Tied values are 66 | # included as well, so number of returned indices can be greater than m. 67 | 68 | # partition around (m+1)th order stat 69 | par_idx = np.argpartition(d, m) 70 | 71 | if d[par_idx[:m]].max() < d[par_idx[m]]: # m < (m+1)th 72 | return par_idx[:m] 73 | elif d[par_idx[m]] < d[par_idx[m+1:]].min(): # m+1 < (m+2)th 74 | return par_idx[:m+1] 75 | else: # mth = (m+1)th = (m+2)th, so increment and recurse 76 | return smallestm(d, m+2) 77 | 78 | 79 | def match(X_i, X_m, W, m): 80 | 81 | d = norm(X_i, X_m, W) 82 | 83 | return smallestm(d, m) 84 | 85 | 86 | def bias_coefs(matches, Y_m, X_m): 87 | 88 | # Computes OLS coefficient in bias correction regression. Constructs 89 | # data for regression by including (possibly multiple times) every 90 | # observation that has appeared in the matched sample. 91 | 92 | flat_idx = reduce(lambda x,y: np.concatenate((x,y)), matches) 93 | N, K = len(flat_idx), X_m.shape[1] 94 | 95 | Y = Y_m[flat_idx] 96 | X = np.empty((N, K+1)) 97 | X[:, 0] = 1 # intercept term 98 | X[:, 1:] = X_m[flat_idx] 99 | 100 | return np.linalg.lstsq(X, Y)[0][1:] # don't need intercept coef 101 | 102 | 103 | def bias(X, X_m, matches, coefs): 104 | 105 | # Computes bias correction term, which is approximated by the dot 106 | # product of the matching discrepancy (i.e., X-X_matched) and the 107 | # coefficients from the bias correction regression. 108 | 109 | X_m_mean = [X_m[idx].mean(0) for idx in matches] 110 | bias_list = [(X_j-X_i).dot(coefs) for X_i,X_j in zip(X, X_m_mean)] 111 | 112 | return np.array(bias_list) 113 | 114 | 115 | def scaled_counts(N, matches): 116 | 117 | # Counts the number of times each subject has appeared as a match. In 118 | # the case of multiple matches, each subject only gets partial credit. 119 | 120 | s_counts = np.zeros(N) 121 | 122 | for matches_i in matches: 123 | scale = 1 / len(matches_i) 124 | for match in matches_i: 125 | s_counts[match] += scale 126 | 127 | return s_counts 128 | 129 | 130 | def calc_atx_var(vars_c, vars_t, weights_c, weights_t): 131 | 132 | N_c, N_t = len(vars_c), len(vars_t) 133 | summands_c = weights_c**2 * vars_c 134 | summands_t = weights_t**2 * vars_t 135 | 136 | return summands_t.sum()/N_t**2 + summands_c.sum()/N_c**2 137 | 138 | 139 | def calc_atc_se(vars_c, vars_t, scaled_counts_t): 140 | 141 | N_c, N_t = len(vars_c), len(vars_t) 142 | weights_c = np.ones(N_c) 143 | weights_t = (N_t/N_c) * scaled_counts_t 144 | 145 | var = calc_atx_var(vars_c, vars_t, weights_c, weights_t) 146 | 147 | return np.sqrt(var) 148 | 149 | 150 | def calc_att_se(vars_c, vars_t, scaled_counts_c): 151 | 152 | N_c, N_t = len(vars_c), len(vars_t) 153 | weights_c = (N_c/N_t) * scaled_counts_c 154 | weights_t = np.ones(N_t) 155 | 156 | var = calc_atx_var(vars_c, vars_t, weights_c, weights_t) 157 | 158 | return np.sqrt(var) 159 | 160 | 161 | def calc_ate_se(vars_c, vars_t, scaled_counts_c, scaled_counts_t): 162 | 163 | N_c, N_t = len(vars_c), len(vars_t) 164 | N = N_c + N_t 165 | weights_c = (N_c/N)*(1+scaled_counts_c) 166 | weights_t = (N_t/N)*(1+scaled_counts_t) 167 | 168 | var = calc_atx_var(vars_c, vars_t, weights_c, weights_t) 169 | 170 | return np.sqrt(var) 171 | 172 | -------------------------------------------------------------------------------- /causalinference/estimators/ols.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | import scipy.linalg 4 | 5 | from .base import Estimator 6 | 7 | 8 | class OLS(Estimator): 9 | 10 | """ 11 | Dictionary-like class containing treatment effect estimates. 12 | """ 13 | 14 | def __init__(self, data, adj): 15 | 16 | self._method = 'OLS' 17 | Y, D, X = data['Y'], data['D'], data['X'] 18 | X_c, X_t = data['X_c'], data['X_t'] 19 | 20 | Z = form_matrix(D, X, adj) 21 | olscoef = np.linalg.lstsq(Z, Y)[0] 22 | u = Y - Z.dot(olscoef) 23 | cov = calc_cov(Z, u) 24 | 25 | self._dict = dict() 26 | self._dict['ate'] = calc_ate(olscoef) 27 | self._dict['ate_se'] = calc_ate_se(cov) 28 | 29 | if adj == 2: 30 | Xmean = X.mean(0) 31 | meandiff_c = X_c.mean(0) - Xmean 32 | meandiff_t = X_t.mean(0) - Xmean 33 | self._dict['atc'] = calc_atx(olscoef, meandiff_c) 34 | self._dict['att'] = calc_atx(olscoef, meandiff_t) 35 | self._dict['atc_se'] = calc_atx_se(cov, meandiff_c) 36 | self._dict['att_se'] = calc_atx_se(cov, meandiff_t) 37 | 38 | 39 | def form_matrix(D, X, adj): 40 | 41 | N, K = X.shape 42 | 43 | if adj == 0: 44 | cols = 2 45 | elif adj == 1: 46 | cols = 2+K 47 | else: 48 | cols = 2+2*K 49 | 50 | Z = np.empty((N, cols)) 51 | Z[:, 0] = 1 # intercept term 52 | Z[:, 1] = D 53 | if adj >= 1: 54 | dX = X - X.mean(0) 55 | Z[:, 2:2+K] = dX 56 | if adj == 2: 57 | Z[:, 2+K:] = D[:, None] * dX 58 | 59 | return Z 60 | 61 | 62 | def calc_ate(olscoef): 63 | 64 | return olscoef[1] # coef of treatment variable 65 | 66 | 67 | def calc_atx(olscoef, meandiff): 68 | 69 | K = (len(olscoef)-2) // 2 70 | 71 | return olscoef[1] + np.dot(meandiff, olscoef[2+K:]) 72 | 73 | 74 | def calc_cov(Z, u): 75 | 76 | A = np.linalg.inv(np.dot(Z.T, Z)) 77 | B = np.dot(u[:, None]*Z, A) 78 | 79 | return np.dot(B.T, B) 80 | 81 | 82 | def submatrix(cov): 83 | 84 | K = (cov.shape[0]-2) // 2 85 | submat = np.empty((1+K, 1+K)) 86 | submat[0,0] = cov[1,1] 87 | submat[0,1:] = cov[1,2+K:] 88 | submat[1:,0] = cov[2+K:,1] 89 | submat[1:,1:] = cov[2+K:, 2+K:] 90 | 91 | return submat 92 | 93 | 94 | def calc_ate_se(cov): 95 | 96 | return np.sqrt(cov[1,1]) 97 | 98 | 99 | def calc_atx_se(cov, meandiff): 100 | 101 | a = np.concatenate((np.array([1]), meandiff)) 102 | 103 | return np.sqrt(a.dot(submatrix(cov)).dot(a)) 104 | 105 | -------------------------------------------------------------------------------- /causalinference/estimators/weighting.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | 4 | from .base import Estimator 5 | from .ols import calc_cov, calc_ate, calc_ate_se 6 | 7 | 8 | class Weighting(Estimator): 9 | 10 | """ 11 | Dictionary-like class containing treatment effect estimates. 12 | """ 13 | 14 | def __init__(self, data): 15 | 16 | self._method = 'Weighting' 17 | Y, D, X = data['Y'], data['D'], data['X'] 18 | pscore = data['pscore'] 19 | 20 | weights = calc_weights(pscore, D) 21 | Y_w, Z_w = weigh_data(Y, D, X, weights) 22 | 23 | wlscoef = np.linalg.lstsq(Z_w, Y_w)[0] 24 | u_w = Y_w - Z_w.dot(wlscoef) 25 | cov = calc_cov(Z_w, u_w) 26 | 27 | self._dict = dict() 28 | self._dict['ate'] = calc_ate(wlscoef) 29 | self._dict['ate_se'] = calc_ate_se(cov) 30 | 31 | 32 | def calc_weights(pscore, D): 33 | 34 | N = pscore.shape[0] 35 | weights = np.empty(N) 36 | weights[D==0] = 1/(1-pscore[D==0]) 37 | weights[D==1] = 1/pscore[D==1] 38 | 39 | return weights 40 | 41 | 42 | def weigh_data(Y, D, X, weights): 43 | 44 | N, K = X.shape 45 | 46 | Y_w = weights * Y 47 | 48 | Z_w = np.empty((N,K+2)) 49 | Z_w[:,0] = weights 50 | Z_w[:,1] = weights * D 51 | Z_w[:,2:] = weights[:,None] * X 52 | 53 | return (Y_w, Z_w) 54 | 55 | -------------------------------------------------------------------------------- /causalinference/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .tools import random_data, vignette_data, lalonde_data 2 | 3 | -------------------------------------------------------------------------------- /causalinference/utils/lalonde_data.txt: -------------------------------------------------------------------------------- 1 | re78 t black hisp age married nodegree educ re74 u74 re75 u75 2 | 9.93005 1 1 0 37 1 1 11 0 1 0 1 3 | 3.59589 1 0 1 22 0 1 9 0 1 0 1 4 | 24.9095 1 1 0 30 0 0 12 0 1 0 1 5 | 7.50615 1 1 0 27 0 1 11 0 1 0 1 6 | .28979 1 1 0 33 0 1 8 0 1 0 1 7 | 4.05649 1 1 0 22 0 1 9 0 1 0 1 8 | 0 1 1 0 23 0 0 12 0 1 0 1 9 | 8.47216 1 1 0 32 0 1 11 0 1 0 1 10 | 2.16402 1 1 0 22 0 0 16 0 1 0 1 11 | 12.4181 1 0 0 33 1 0 12 0 1 0 1 12 | 8.17391 1 1 0 19 0 1 9 0 1 0 1 13 | 17.0946 1 1 0 21 0 0 13 0 1 0 1 14 | 0 1 1 0 18 0 1 8 0 1 0 1 15 | 18.7399 1 1 0 27 1 1 10 0 1 0 1 16 | 3.02388 1 1 0 17 0 1 7 0 1 0 1 17 | 3.2285 1 1 0 19 0 1 10 0 1 0 1 18 | 14.5819 1 1 0 27 0 0 13 0 1 0 1 19 | 7.6934 1 1 0 23 0 1 10 0 1 0 1 20 | 10.8043 1 1 0 40 0 0 12 0 1 0 1 21 | 10.7474 1 1 0 26 0 0 12 0 1 0 1 22 | 0 1 1 0 23 0 1 11 0 1 0 1 23 | 5.1495 1 0 0 41 0 0 14 0 1 0 1 24 | 6.40895 1 0 0 38 0 1 9 0 1 0 1 25 | 1.9914 1 1 0 24 0 1 11 0 1 0 1 26 | 11.1632 1 1 0 18 0 1 10 0 1 0 1 27 | 9.643 1 1 0 29 1 1 11 0 1 0 1 28 | 9.89705 1 1 0 25 0 1 11 0 1 0 1 29 | 11.1429 1 0 1 27 0 1 10 0 1 0 1 30 | 16.218 1 1 0 17 0 1 10 0 1 0 1 31 | .9957 1 1 0 24 0 1 11 0 1 0 1 32 | 0 1 1 0 17 0 1 10 0 1 0 1 33 | 6.55159 1 1 0 48 0 1 4 0 1 0 1 34 | 1.57442 1 1 0 25 1 1 11 0 1 0 1 35 | 0 1 1 0 20 0 0 12 0 1 0 1 36 | 3.19175 1 1 0 25 0 0 12 0 1 0 1 37 | 20.5059 1 1 0 42 0 0 14 0 1 0 1 38 | 6.18188 1 1 0 25 0 1 5 0 1 0 1 39 | 5.91155 1 1 0 23 1 0 12 0 1 0 1 40 | 3.09416 1 1 0 46 1 1 8 0 1 0 1 41 | 0 1 1 0 24 0 1 10 0 1 0 1 42 | 1.25458 1 1 0 21 0 0 12 0 1 0 1 43 | 13.1888 1 0 0 19 0 1 9 0 1 0 1 44 | 8.06149 1 1 0 17 0 1 8 0 1 0 1 45 | 2.78796 1 0 1 18 1 1 8 0 1 0 1 46 | 3.97254 1 1 0 20 0 1 11 0 1 0 1 47 | 0 1 1 0 25 1 1 11 0 1 0 1 48 | 0 1 1 0 17 0 1 8 0 1 0 1 49 | 0 1 1 0 17 0 1 9 0 1 0 1 50 | 12.1874 1 1 0 25 0 1 5 0 1 0 1 51 | 4.84318 1 1 0 23 0 0 12 0 1 0 1 52 | 0 1 1 0 28 0 1 8 0 1 0 1 53 | 8.08749 1 1 0 31 1 1 11 0 1 0 1 54 | 0 1 1 0 18 0 1 11 0 1 0 1 55 | 2.34897 1 1 0 25 0 0 12 0 1 0 1 56 | .590782 1 1 0 30 1 1 11 0 1 0 1 57 | 0 1 1 0 17 0 1 10 0 1 0 1 58 | 1.06751 1 1 0 37 0 1 9 0 1 0 1 59 | 7.28499 1 1 0 41 1 1 4 0 1 0 1 60 | 13.1675 1 1 0 42 1 0 14 0 1 0 1 61 | 1.04843 1 0 0 22 0 1 11 0 1 0 1 62 | 0 1 1 0 17 0 1 8 0 1 0 1 63 | 1.92394 1 1 0 29 0 1 8 0 1 0 1 64 | 4.66624 1 1 0 35 0 1 10 0 1 0 1 65 | .549298 1 1 0 27 0 1 11 0 1 0 1 66 | .762915 1 1 0 29 0 1 4 0 1 0 1 67 | 10.6943 1 1 0 28 0 1 9 0 1 0 1 68 | 0 1 1 0 27 0 1 11 0 1 0 1 69 | 0 1 0 0 23 0 1 7 0 1 0 1 70 | 8.54672 1 1 0 45 1 1 5 0 1 0 1 71 | 7.47966 1 1 0 29 0 0 13 0 1 0 1 72 | 0 1 1 0 27 0 1 9 0 1 0 1 73 | .647205 1 1 0 46 0 0 13 0 1 0 1 74 | 0 1 1 0 18 0 1 6 0 1 0 1 75 | 11.9658 1 1 0 25 0 0 12 0 1 0 1 76 | 9.59854 1 1 0 28 0 0 15 0 1 0 1 77 | 18.7834 1 0 0 25 0 1 11 0 1 0 1 78 | 18.6781 1 1 0 22 0 0 12 0 1 0 1 79 | 0 1 1 0 21 0 1 9 0 1 0 1 80 | 23.0056 1 1 0 40 0 1 11 0 1 0 1 81 | 6.4567 1 1 0 22 0 1 11 0 1 0 1 82 | 0 1 1 0 25 0 0 12 0 1 0 1 83 | 2.32111 1 1 0 18 0 0 12 0 1 0 1 84 | 4.94185 1 0 0 38 0 0 12 0 1 0 1 85 | 0 1 1 0 27 0 0 13 0 1 0 1 86 | 0 1 1 0 27 0 1 8 0 1 0 1 87 | 0 1 1 0 38 0 1 11 0 1 0 1 88 | 3.88128 1 0 1 23 0 1 8 0 1 0 1 89 | 17.231 1 1 0 26 0 1 11 0 1 0 1 90 | 8.0486 1 0 0 21 0 0 12 0 1 0 1 91 | 0 1 1 0 25 0 1 8 0 1 0 1 92 | 14.5099 1 1 0 31 1 1 11 0 1 0 1 93 | 0 1 1 0 17 0 1 10 0 1 0 1 94 | 0 1 1 0 25 0 1 11 0 1 0 1 95 | 9.98378 1 1 0 21 0 0 12 0 1 0 1 96 | 0 1 1 0 44 0 1 11 0 1 0 1 97 | 5.5875 1 0 0 25 0 0 12 0 1 0 1 98 | 4.48285 1 1 0 18 0 1 9 0 1 0 1 99 | 2.45615 1 1 0 42 0 0 12 0 1 0 1 100 | 0 1 1 0 25 0 1 10 0 1 0 1 101 | 26.8176 1 0 1 31 0 1 9 0 1 0 1 102 | 0 1 1 0 24 0 1 10 0 1 0 1 103 | 9.26579 1 1 0 26 0 1 10 0 1 0 1 104 | .48523 1 1 0 25 0 1 11 0 1 0 1 105 | 4.81463 1 1 0 18 0 1 11 0 1 0 1 106 | 7.45811 1 1 0 19 0 1 11 0 1 0 1 107 | 0 1 1 0 43 0 1 9 0 1 0 1 108 | 34.0993 1 1 0 27 0 0 13 0 1 0 1 109 | 1.95327 1 1 0 17 0 1 9 0 1 0 1 110 | 0 1 1 0 30 0 1 11 0 1 0 1 111 | 0 1 1 0 26 1 1 10 2.028 0 0 1 112 | 8.88167 1 1 0 20 0 1 9 6.08399 0 0 1 113 | 6.21067 1 0 1 17 0 1 9 .44517 0 .0743435 0 114 | 0 1 1 0 20 0 0 12 .989268 0 .165208 0 115 | .929884 1 1 0 18 0 1 11 .858254 0 .214564 0 116 | 0 1 1 0 27 1 0 12 3.67087 0 .334049 0 117 | 12.558 1 0 0 21 0 0 12 3.67087 0 .334049 0 118 | 22.1633 1 1 0 27 0 0 12 2.14341 0 .35795 0 119 | 1.65264 1 1 0 20 0 0 12 0 1 .377569 0 120 | 8.124721 1 1 0 19 0 1 10 0 1 .385274 0 121 | .6713319 1 1 0 23 0 0 12 5.50631 0 .501074 0 122 | 17.815 1 1 0 29 0 0 14 0 1 .679673 0 123 | 9.73715 1 1 0 18 0 1 10 0 1 .798908 0 124 | 17.6852 1 1 0 19 0 1 9 0 1 .798908 0 125 | 0 1 0 0 27 1 0 13 9.38157 0 .853723 0 126 | 4.32171 1 0 0 18 0 1 11 3.67823 0 .919558 0 127 | 1.77342 1 1 0 27 1 1 9 0 1 .934445 0 128 | 0 1 1 0 22 0 0 12 5.60585 0 .936177 0 129 | 11.2333 1 1 0 23 1 1 10 0 1 .936439 0 130 | .559443 1 0 1 23 0 0 12 9.38574 0 1.11744 0 131 | 1.08544 1 1 0 20 0 1 11 3.6375 0 1.22084 0 132 | 5.4452 1 1 0 17 0 1 9 1.71651 0 1.25344 0 133 | 60.3079 1 1 0 28 0 1 11 0 1 1.28408 0 134 | 1.46036 1 1 0 26 1 1 11 0 1 1.39285 0 135 | 6.94334 1 1 0 20 0 1 11 16.3186 0 1.48499 0 136 | 4.03271 1 1 0 24 1 1 11 .824389 0 1.66611 0 137 | 10.3633 1 1 0 31 0 1 9 0 1 1.69861 0 138 | 4.23231 1 0 0 23 1 1 8 0 1 1.71315 0 139 | 11.1414 1 1 0 18 0 1 10 2.14341 0 1.78427 0 140 | 0 1 1 0 29 0 0 12 10.8819 0 1.81728 0 141 | 13.3859 1 0 0 26 0 1 11 0 1 2.22627 0 142 | 4.84956 1 1 0 24 0 1 9 9.1547 0 2.28868 0 143 | 0 1 1 0 25 0 0 12 14.4268 0 2.40927 0 144 | 1.66051 1 1 0 24 0 1 10 4.2504 0 2.42195 0 145 | 0 1 1 0 46 0 1 8 3.16566 0 2.59472 0 146 | 2.48455 1 0 0 31 0 0 12 0 1 2.61122 0 147 | 4.1466 1 1 0 19 0 1 11 2.30503 0 2.61528 0 148 | 9.970679 1 1 0 19 0 1 8 0 1 2.65706 0 149 | 0 1 1 0 27 0 1 11 2.20694 0 2.66627 0 150 | 26.3723 1 1 0 26 1 1 11 0 1 2.75465 0 151 | 5.61519 1 1 0 20 0 1 10 5.00573 0 2.77736 0 152 | 3.19657 1 1 0 28 0 1 10 0 1 2.83651 0 153 | 6.16768 1 1 0 24 0 0 12 13.7658 0 2.84276 0 154 | 7.53594 1 1 0 19 0 1 8 2.63635 0 2.93726 0 155 | 8.484241 1 1 0 23 0 0 12 6.26934 0 3.03996 0 156 | 1.29441 1 1 0 42 1 1 9 0 1 3.05853 0 157 | 0 1 1 0 25 0 0 13 12.3629 0 3.09073 0 158 | 5.01034 1 1 0 18 0 1 9 0 1 3.28738 0 159 | 9.37104 1 1 0 21 0 0 12 6.47368 0 3.33241 0 160 | 0 1 1 0 27 0 1 10 1.00115 0 3.55008 0 161 | 4.27961 1 1 0 21 0 1 8 .989268 0 3.6959 0 162 | 3.46256 1 1 0 22 0 1 9 2.19288 0 3.83699 0 163 | 7.38255 1 1 0 31 0 1 4 8.51759 0 4.02321 0 164 | 0 1 1 0 24 1 1 10 11.7032 0 4.07815 0 165 | 0 1 1 0 29 0 1 10 0 1 4.39895 0 166 | 10.9765 1 1 0 29 0 0 12 9.748389 0 4.87894 0 167 | 13.8296 1 0 0 19 0 1 10 0 1 5.32411 0 168 | 6.78846 1 0 1 19 1 1 11 5.42449 0 5.4638 0 169 | 9.5585 1 1 0 31 0 1 9 10.717 0 5.51784 0 170 | 13.2283 1 1 0 22 1 1 10 1.46835 0 5.58866 0 171 | .743667 1 1 0 21 0 1 9 6.41647 0 5.74933 0 172 | 5.52279 1 1 0 17 0 1 10 1.29147 0 5.79385 0 173 | 1.42494 1 1 0 26 1 0 12 8.40876 0 5.79483 0 174 | 1.35864 1 0 1 20 0 1 9 12.2608 0 5.87505 0 175 | 0 1 1 0 19 0 1 10 4.12195 0 6.05675 0 176 | .672877 1 1 0 26 0 1 10 25.9297 0 6.78896 0 177 | 0 1 1 0 28 0 1 11 1.92903 0 6.87186 0 178 | 10.0928 1 0 1 22 1 0 12 .492231 0 7.0557 0 179 | 6.28143 1 1 0 33 0 1 11 0 1 7.86792 0 180 | 12.5907 1 0 0 22 0 0 12 6.75999 0 8.4555 0 181 | 5.11201 1 0 1 29 0 1 10 0 1 8.85367 0 182 | 15.9526 1 1 0 33 1 0 12 20.28 0 10.9414 0 183 | 36.647 1 1 0 25 1 0 14 35.0401 0 11.5366 0 184 | 12.804 1 1 0 35 1 1 9 13.6024 0 13.8306 0 185 | 3.78663 1 1 0 35 1 1 8 13.7321 0 17.9762 0 186 | 4.18194 1 1 0 33 1 1 11 14.6607 0 25.1422 0 187 | 0 0 1 0 23 0 1 10 0 1 0 1 188 | 12.3837 0 0 0 26 0 0 12 0 1 0 1 189 | 0 0 1 0 22 0 1 9 0 1 0 1 190 | 10.7401 0 1 0 18 0 1 9 0 1 0 1 191 | 11.7965 0 1 0 45 0 1 11 0 1 0 1 192 | 9.22705 0 1 0 18 0 1 9 0 1 0 1 193 | 10.5693 0 0 0 24 0 1 8 0 1 0 1 194 | 6.04034 0 1 0 34 1 1 11 0 1 0 1 195 | 3.88083 0 0 1 24 0 1 4 0 1 0 1 196 | 0 0 1 0 36 0 1 10 0 1 0 1 197 | 5.77506 0 1 0 21 0 0 14 0 1 0 1 198 | 0 0 1 0 28 0 1 9 0 1 0 1 199 | 0 0 1 0 27 1 1 7 0 1 0 1 200 | 0 0 0 0 19 0 1 11 0 1 0 1 201 | 0 0 1 0 20 0 1 8 0 1 0 1 202 | 2.11372 0 1 0 34 0 0 12 0 1 0 1 203 | 7.61864 0 1 0 24 0 1 10 0 1 0 1 204 | 9.92095 0 0 1 22 0 1 8 0 1 0 1 205 | 4.19638 0 1 0 25 0 1 11 0 1 0 1 206 | 0 0 1 0 39 0 1 9 0 1 0 1 207 | 16.6583 0 1 0 19 1 1 9 0 1 0 1 208 | 9.722 0 1 0 44 0 1 9 0 1 0 1 209 | 3.78366 0 1 0 27 0 1 8 0 1 0 1 210 | 3.51593 0 1 0 25 0 1 8 0 1 0 1 211 | 17.0146 0 1 0 31 0 1 10 0 1 0 1 212 | 0 0 1 0 34 1 1 10 0 1 0 1 213 | 0 0 0 1 21 0 1 7 0 1 0 1 214 | 5.97026 0 1 0 33 0 0 12 0 1 0 1 215 | 1.85917 0 0 1 18 0 1 10 0 1 0 1 216 | 6.19194 0 1 0 26 1 0 12 0 1 0 1 217 | 7.28439 0 1 0 31 0 0 12 0 1 0 1 218 | .445831 0 1 0 35 0 1 10 0 1 0 1 219 | 0 0 1 0 20 0 0 12 0 1 0 1 220 | 0 0 1 0 25 0 1 11 0 1 0 1 221 | 7.36704 0 1 0 25 0 1 10 0 1 0 1 222 | 0 0 1 0 35 0 1 11 0 1 0 1 223 | 2.0155 0 1 0 20 0 1 10 0 1 0 1 224 | 15.7911 0 0 1 25 0 1 9 0 1 0 1 225 | 1.13547 0 1 0 27 0 1 10 0 1 0 1 226 | 6.37872 0 0 1 20 0 1 11 0 1 0 1 227 | 7.17619 0 1 0 26 0 1 11 0 1 0 1 228 | 0 0 1 0 38 0 1 8 0 1 0 1 229 | 7.95254 0 1 0 34 0 1 10 0 1 0 1 230 | 0 0 1 0 19 0 0 12 0 1 0 1 231 | 7.15213 0 1 0 32 0 1 8 0 1 0 1 232 | 8.329821 0 0 1 20 0 1 9 0 1 0 1 233 | 0 0 1 0 23 0 1 10 0 1 0 1 234 | 12.4299 0 1 0 38 0 1 10 0 1 0 1 235 | 0 0 1 0 24 0 1 11 0 1 0 1 236 | 5.08876 0 1 0 23 0 1 11 0 1 0 1 237 | 4.37404 0 0 1 20 0 1 7 0 1 0 1 238 | 1.55329 0 1 0 21 0 1 11 0 1 0 1 239 | 0 0 1 0 25 0 1 10 0 1 0 1 240 | 1.6983 0 1 0 22 1 1 11 0 1 0 1 241 | 0 0 1 0 23 0 1 11 0 1 0 1 242 | 11.2946 0 0 0 24 0 0 12 0 1 0 1 243 | 0 0 1 0 29 0 1 11 0 1 0 1 244 | 14.6264 0 1 0 24 0 1 11 0 1 0 1 245 | 12.8984 0 1 0 22 0 1 9 0 1 0 1 246 | 5.76713 0 1 0 28 0 1 11 0 1 0 1 247 | 6.52792 0 0 1 18 0 1 10 0 1 0 1 248 | 3.93124 0 1 0 26 0 1 10 0 1 0 1 249 | 20.9422 0 1 0 25 0 1 10 0 1 0 1 250 | 0 0 1 0 24 0 1 10 0 1 0 1 251 | 0 0 1 0 26 0 1 5 0 1 0 1 252 | 14.6904 0 1 0 36 0 1 10 0 1 0 1 253 | 0 0 1 0 22 0 1 11 0 1 0 1 254 | 3.4181 0 1 0 25 0 0 12 0 1 0 1 255 | 11.1973 0 1 0 27 0 1 11 0 1 0 1 256 | 0 0 1 0 29 0 1 8 0 1 0 1 257 | 0 0 1 0 24 0 0 12 0 1 0 1 258 | 0 0 1 0 22 0 1 10 0 1 0 1 259 | 1.45569 0 1 0 24 0 1 7 0 1 0 1 260 | 1.89094 0 1 0 29 0 0 12 0 1 0 1 261 | 4.48562 0 0 1 25 1 1 11 0 1 0 1 262 | 13.6134 0 1 0 30 0 0 12 0 1 0 1 263 | 1.39051 0 1 0 22 0 1 8 0 1 0 1 264 | 5.8438 0 1 0 55 0 1 3 0 1 0 1 265 | 8.598519 0 1 0 20 0 1 10 0 1 0 1 266 | 2.9202 0 1 0 34 0 1 11 0 1 0 1 267 | 0 0 1 0 22 0 0 12 0 1 0 1 268 | 6.73532 0 0 1 32 1 0 12 0 1 0 1 269 | 0 0 1 0 31 0 1 10 0 1 0 1 270 | 0 0 1 0 18 0 1 9 0 1 0 1 271 | 0 0 0 1 50 0 1 10 0 1 0 1 272 | .0447555 0 1 0 25 1 1 11 0 1 0 1 273 | 0 0 1 0 23 1 1 10 0 1 0 1 274 | 0 0 1 0 38 0 1 10 0 1 0 1 275 | 3.70181 0 1 0 25 1 1 10 0 1 0 1 276 | 6.93034 0 1 0 42 0 1 10 0 1 0 1 277 | 3.7958 0 1 0 39 1 0 12 0 1 0 1 278 | 5.19325 0 1 0 34 1 0 13 0 1 0 1 279 | 2.19353 0 1 0 24 0 1 7 0 1 0 1 280 | 11.1205 0 1 0 32 0 1 11 0 1 0 1 281 | 7.60952 0 1 0 27 0 0 13 0 1 0 1 282 | 2.16903 0 1 0 26 0 1 10 0 1 0 1 283 | 0 0 1 0 44 0 1 11 0 1 0 1 284 | 1.26423 0 1 0 25 0 1 11 0 1 0 1 285 | 0 0 1 0 25 0 0 12 0 1 0 1 286 | 0 0 1 0 28 1 0 12 0 1 0 1 287 | 0 0 1 0 32 0 1 10 0 1 0 1 288 | 0 0 1 0 22 0 1 10 0 1 0 1 289 | 5.71264 0 1 0 19 0 1 9 0 1 0 1 290 | 0 0 1 0 31 1 1 10 0 1 0 1 291 | 0 0 1 0 23 0 1 11 0 1 0 1 292 | 0 0 1 0 33 0 1 11 0 1 0 1 293 | 1.18488 0 1 0 27 0 1 10 0 1 0 1 294 | 10.2259 0 1 0 29 1 1 11 0 1 0 1 295 | 0 0 1 0 23 0 1 10 0 1 0 1 296 | 4.71537 0 1 0 25 1 1 9 0 1 0 1 297 | .28979 0 1 0 25 0 1 10 0 1 0 1 298 | 0 0 1 0 24 0 1 10 0 1 0 1 299 | 8.19042 0 0 0 28 0 1 8 0 1 0 1 300 | 4.81305 0 1 0 26 0 1 6 0 1 0 1 301 | 7.34468 0 1 0 30 1 0 14 0 1 0 1 302 | 0 0 1 0 25 1 1 10 0 1 0 1 303 | 0 0 1 0 29 1 1 11 0 1 0 1 304 | 0 0 1 0 25 1 0 12 0 1 0 1 305 | 0 0 1 0 28 0 0 13 0 1 0 1 306 | 4.35091 0 1 0 23 0 1 11 0 1 0 1 307 | 7.81252 0 1 0 54 0 1 11 0 1 0 1 308 | 0 0 0 1 33 0 1 5 0 1 0 1 309 | 3.64466 0 1 0 20 0 1 8 0 1 0 1 310 | 4.8448 0 1 0 45 0 1 9 0 1 0 1 311 | 0 0 1 0 39 0 1 6 0 1 0 1 312 | 0 0 1 0 26 0 0 12 0 1 0 1 313 | 0 0 1 0 23 0 1 10 0 1 0 1 314 | 14.7929 0 0 0 27 0 0 12 0 1 0 1 315 | 0 0 0 1 33 1 1 9 0 1 0 1 316 | 0 0 1 0 25 1 1 10 0 1 0 1 317 | 3.7467 0 1 0 23 0 1 8 0 1 0 1 318 | 1.56815 0 1 0 18 0 1 8 0 1 0 1 319 | 7.01044 0 1 0 17 0 1 8 0 1 0 1 320 | 3.81168 0 0 1 19 0 1 9 0 1 0 1 321 | 10.7986 0 0 1 18 0 1 8 0 1 0 1 322 | 4.65727 0 1 0 18 0 1 11 0 1 0 1 323 | 8.55153 0 1 0 17 0 1 11 0 1 0 1 324 | 4.30988 0 1 0 19 0 1 10 0 1 0 1 325 | 5.2864 0 1 0 19 0 1 10 0 1 0 1 326 | 12.4862 0 1 0 18 0 1 9 0 1 0 1 327 | 10.8774 0 1 0 18 0 1 9 0 1 0 1 328 | .202285 0 1 0 18 0 1 10 0 1 0 1 329 | 2.65771 0 1 0 17 0 1 10 0 1 0 1 330 | 4.13258 0 1 0 18 0 1 7 0 1 0 1 331 | 11.3031 0 1 0 18 0 1 11 0 1 0 1 332 | 0 0 0 1 19 0 1 10 0 1 0 1 333 | 0 0 1 0 18 0 1 9 0 1 0 1 334 | 0 0 1 0 17 0 1 10 0 1 0 1 335 | 2.18943 0 1 0 17 0 1 10 0 1 0 1 336 | 0 0 1 0 19 0 1 11 0 1 0 1 337 | 10.211 0 0 0 17 0 1 8 0 1 0 1 338 | 11.0481 0 1 0 18 0 1 10 0 1 0 1 339 | 0 0 1 0 18 0 1 9 0 1 0 1 340 | 8.99387 0 1 0 17 0 1 8 0 1 0 1 341 | 5.0718 0 0 1 19 0 1 6 0 1 0 1 342 | 3.19401 0 0 0 19 0 1 10 0 1 0 1 343 | 0 0 0 0 17 0 1 11 0 1 0 1 344 | 5.19309 0 1 0 20 0 1 9 0 1 0 1 345 | 0 0 1 0 17 0 1 9 0 1 0 1 346 | .275566 0 1 0 17 0 1 10 0 1 0 1 347 | 3.5907 0 1 0 17 0 1 9 0 1 0 1 348 | 0 0 1 0 19 0 1 11 0 1 0 1 349 | 12.7977 0 1 0 19 1 1 10 0 1 0 1 350 | 2.03591 0 1 0 20 0 1 9 0 1 0 1 351 | 2.38968 0 1 0 18 0 1 9 0 1 0 1 352 | 0 0 1 0 18 0 1 11 0 1 0 1 353 | 8.46928 0 0 1 17 0 1 10 0 1 0 1 354 | 0 0 1 0 19 0 1 11 0 1 0 1 355 | 1.14339 0 1 0 17 0 1 10 0 1 0 1 356 | 5.11481 0 0 1 17 0 1 9 0 1 0 1 357 | .781224 0 1 0 18 0 1 10 0 1 0 1 358 | 3.34322 0 1 0 21 0 1 9 .591499 0 0 1 359 | 9.602441 0 1 0 18 0 1 10 1.56325 0 0 1 360 | 0 0 1 0 19 0 1 11 1.62662 0 0 1 361 | 16.4616 0 1 0 24 0 1 9 2.7885 0 0 1 362 | 6.77162 0 0 1 28 1 1 11 3.47295 0 0 1 363 | 0 0 1 0 25 0 1 11 5.28125 0 0 1 364 | 11.0116 0 1 0 21 0 1 7 33.8 0 0 1 365 | 0 0 1 0 39 0 1 11 0 1 .0836896 0 366 | 0 0 1 0 36 0 0 12 0 1 .142397 0 367 | 0 0 1 0 24 0 0 12 0 1 .159885 0 368 | 4.25113 0 1 0 17 0 1 11 .989268 0 .165208 0 369 | 2.89167 0 1 0 18 0 1 10 .960427 0 .240107 0 370 | 5.51437 0 1 0 18 0 1 10 0 1 .273553 0 371 | 4.8589 0 1 0 28 0 1 10 1.47129 0 .367823 0 372 | 4.81258 0 0 0 27 0 0 13 5.21431 0 .474502 0 373 | 0 0 0 0 31 0 0 12 0 1 .494643 0 374 | .604199 0 1 0 22 0 1 9 0 1 .506408 0 375 | 14.5279 0 1 0 31 0 1 10 0 1 .520446 0 376 | 0 0 1 0 26 1 1 10 6.14037 0 .558773 0 377 | 7.3005 0 0 0 18 0 1 9 0 1 .559596 0 378 | 0 0 1 0 23 0 1 11 6.38231 0 .58079 0 379 | 4.15992 0 1 0 20 0 0 12 0 1 .591815 0 380 | 0 0 1 0 19 0 1 10 0 1 .604154 0 381 | 5.49759 0 1 0 18 0 1 11 1.0647 0 .645272 0 382 | 0 0 1 0 17 0 1 10 0 1 .664569 0 383 | 0 0 1 0 27 1 0 12 0 1 .75239 0 384 | 0 0 1 0 27 0 1 11 3.06519 0 .766299 0 385 | 16.477 0 0 0 28 0 0 12 0 1 .803343 0 386 | 0 0 1 0 28 0 1 11 2.43195 0 .86348 0 387 | 39.4835 0 1 0 21 0 1 10 6.66106 0 1.16236 0 388 | 11.3063 0 0 1 17 0 1 10 4.90512 0 1.1689 0 389 | 6.67202 0 1 0 26 0 1 11 4.69996 0 1.17499 0 390 | 9.378651 0 1 0 29 0 1 9 0 1 1.20382 0 391 | 5.08899 0 0 1 17 0 1 10 1.20361 0 1.23963 0 392 | 2.63929 0 1 0 22 0 1 11 7.91413 0 1.32166 0 393 | 9.4959 0 1 0 24 0 1 11 0 1 1.32799 0 394 | 20.8931 0 1 0 20 0 0 12 .557699 0 1.37147 0 395 | 0 0 1 0 18 0 0 12 0 1 1.40551 0 396 | 10.3617 0 0 0 24 0 1 11 2.66973 0 1.46838 0 397 | 1.7402 0 0 0 21 0 1 9 2.98841 0 1.57717 0 398 | 0 0 1 0 30 0 1 8 0 1 1.70666 0 399 | 0 0 1 0 31 0 1 11 17.7119 0 1.72645 0 400 | 6.35419 0 0 1 17 0 1 10 1.44268 0 1.73456 0 401 | 7.171 0 1 0 19 0 1 9 8.40963 0 1.77809 0 402 | 5.57355 0 1 0 23 0 1 11 0 1 1.89602 0 403 | .439688 0 1 0 22 0 0 12 4.38002 0 2.00368 0 404 | 16.97 0 1 0 29 0 0 12 22.8594 0 2.08021 0 405 | 5.34402 0 1 0 22 0 1 10 0 1 2.17496 0 406 | 2.72532 0 1 0 29 0 0 13 .718249 0 2.26558 0 407 | 9.772281 0 1 0 19 0 1 11 .721341 0 2.44559 0 408 | 0 0 1 0 17 0 1 9 0 1 2.59527 0 409 | 0 0 1 0 18 0 1 10 1.71651 0 2.68213 0 410 | 1.72091 0 1 0 19 0 0 12 8.417 0 2.8142 0 411 | 0 0 1 0 20 0 1 6 6.00688 0 2.85061 0 412 | 18.8599 0 1 0 33 0 1 11 10.5238 0 2.89982 0 413 | 1.32454 0 0 1 36 0 1 11 5.44373 0 3.06388 0 414 | .284658 0 1 0 25 0 1 11 15.21 0 3.07273 0 415 | 11.1959 0 1 0 19 0 1 11 3.50401 0 3.28568 0 416 | 0 0 1 0 23 0 1 8 7.72428 0 3.40306 0 417 | 0 0 1 0 17 0 1 11 4.08073 0 3.79603 0 418 | 7.56527 0 1 0 43 0 1 10 2.50287 0 4.12844 0 419 | 0 0 1 0 26 1 1 11 0 1 4.18473 0 420 | 0 0 1 0 27 1 1 11 0 1 4.49188 0 421 | 0 0 1 0 19 0 1 11 6.33749 0 4.50306 0 422 | 4.97459 0 1 0 28 1 1 11 8.593161 0 5.3939 0 423 | 12.78 0 1 0 28 1 0 12 10.5851 0 5.55146 0 424 | 3.52358 0 1 0 26 0 1 8 1.12629 0 5.5626 0 425 | 0 0 1 0 31 0 0 12 0 1 5.61391 0 426 | 10.2748 0 1 0 23 0 1 11 7.61736 0 5.71641 0 427 | 4.77972 0 1 0 20 0 0 12 7.18249 0 6.00473 0 428 | 16.9882 0 1 0 28 1 1 10 8.293349 0 6.44948 0 429 | .499257 0 1 0 39 1 0 12 19.7853 0 6.60814 0 430 | 3.08358 0 1 0 21 0 1 8 39.5707 0 6.6083 0 431 | 3.70872 0 1 0 22 0 1 11 8.81007 0 6.97448 0 432 | 7.65922 0 1 0 20 0 1 11 8.00916 0 7.66688 0 433 | 20.8578 0 0 1 21 0 1 11 2.99253 0 8.920469 0 434 | 7.07818 0 0 1 23 0 0 12 5.7217 0 8.96068 0 435 | 0 0 1 0 29 0 1 9 9.26894 0 9.16069 0 436 | 1.23984 0 1 0 28 1 1 9 10.2224 0 9.21045 0 437 | 3.9828 0 1 0 30 1 1 11 0 1 9.31194 0 438 | 0 0 1 0 25 1 1 10 13.52 0 9.319441 0 439 | 0 0 1 0 28 1 1 11 .824389 0 10.0339 0 440 | 7.09492 0 0 0 22 0 1 10 27.8644 0 10.5987 0 441 | 12.3593 0 1 0 44 1 1 9 12.2608 0 10.8572 0 442 | 0 0 1 0 21 0 1 9 31.8864 0 12.3572 0 443 | 0 0 1 0 28 0 1 11 17.4915 0 13.3713 0 444 | 16.9003 0 0 1 29 0 1 9 9.59431 0 16.3412 0 445 | 7.34396 0 1 0 25 1 1 9 24.7316 0 16.9466 0 446 | 5.4488 0 0 0 22 1 1 10 25.7209 0 23.032 0 447 | -------------------------------------------------------------------------------- /causalinference/utils/tools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.stats import norm, logistic 3 | 4 | from os import path 5 | lalonde_file = path.join(path.dirname(__file__), 'lalonde_data.txt') 6 | vignette_file = path.join(path.dirname(__file__), 'vignette_data.txt') 7 | 8 | 9 | def convert_to_formatting(entry_types): 10 | 11 | for entry_type in entry_types: 12 | if entry_type == 'string': 13 | yield 's' 14 | elif entry_type == 'float': 15 | yield '.3f' 16 | elif entry_type == 'integer': 17 | yield '.0f' 18 | 19 | 20 | def add_row(entries, entry_types, col_spans, width): 21 | 22 | #Convert an array of string or numeric entries into a string with 23 | #even formatting and spacing. 24 | 25 | vis_cols = len(col_spans) 26 | invis_cols = sum(col_spans) 27 | 28 | char_per_col = width // invis_cols 29 | first_col_padding = width % invis_cols 30 | 31 | char_spans = [char_per_col * col_span for col_span in col_spans] 32 | char_spans[0] += first_col_padding 33 | formatting = convert_to_formatting(entry_types) 34 | line = ['%'+str(s)+f for (s,f) in zip(char_spans,formatting)] 35 | 36 | return (''.join(line) % tuple(entries)) + '\n' 37 | 38 | 39 | def add_line(width): 40 | 41 | return '-'*width + '\n' 42 | 43 | 44 | def gen_reg_entries(varname, coef, se): 45 | 46 | z = coef / se 47 | p = 2*(1 - norm.cdf(np.abs(z))) 48 | lw = coef - 1.96*se 49 | up = coef + 1.96*se 50 | 51 | return (varname, coef, se, z, p, lw, up) 52 | 53 | 54 | def random_data(N=5000, K=3, unobservables=False, **kwargs): 55 | 56 | """ 57 | Function that generates data according to one of two simple models that 58 | satisfies the unconfoundedness assumption. 59 | 60 | The covariates and error terms are generated according to 61 | X ~ N(mu, Sigma), epsilon ~ N(0, Gamma). 62 | 63 | The counterfactual outcomes are generated by 64 | Y0 = X*beta + epsilon_0, 65 | Y1 = delta + X*(beta+theta) + epsilon_1. 66 | 67 | Selection is done according to the following propensity score function: 68 | P(D=1|X) = Lambda(X*beta). 69 | 70 | Here Lambda is the standard logistic CDF. 71 | 72 | Parameters 73 | ---------- 74 | N: int 75 | Number of units to draw. Defaults to 5000. 76 | K: int 77 | Number of covariates. Defaults to 3. 78 | unobservables: bool 79 | Returns potential outcomes and true propensity score 80 | in addition to observed outcome and covariates if True. 81 | Defaults to False. 82 | mu, Sigma, Gamma, beta, delta, theta: NumPy ndarrays, optional 83 | Parameter values appearing in data generating process. 84 | 85 | Returns 86 | ------- 87 | tuple 88 | A tuple in the form of (Y, D, X) or (Y, D, X, Y0, Y1) of 89 | observed outcomes, treatment indicators, covariate matrix, 90 | and potential outomces. 91 | """ 92 | 93 | mu = kwargs.get('mu', np.zeros(K)) 94 | beta = kwargs.get('beta', np.ones(K)) 95 | theta = kwargs.get('theta', np.ones(K)) 96 | delta = kwargs.get('delta', 3) 97 | Sigma = kwargs.get('Sigma', np.identity(K)) 98 | Gamma = kwargs.get('Gamma', np.identity(2)) 99 | 100 | X = np.random.multivariate_normal(mean=mu, cov=Sigma, size=N) 101 | Xbeta = X.dot(beta) 102 | pscore = logistic.cdf(Xbeta) 103 | D = np.array([np.random.binomial(1, p, size=1) for p in pscore]).flatten() 104 | 105 | epsilon = np.random.multivariate_normal(mean=np.zeros(2), cov=Gamma, size=N) 106 | Y0 = Xbeta + epsilon[:,0] 107 | Y1 = delta + X.dot(beta+theta) + epsilon[:,1] 108 | Y = (1-D)*Y0 + D*Y1 109 | 110 | if unobservables: 111 | return Y, D, X, Y0, Y1, pscore 112 | else: 113 | return Y, D, X 114 | 115 | 116 | def read_tsv(filepath): 117 | 118 | data = np.loadtxt(filepath, delimiter='\t', skiprows=1) 119 | Y = data[:,0] 120 | D = data[:,1] 121 | X = data[:,2:] 122 | 123 | return Y, D, X 124 | 125 | 126 | def vignette_data(): 127 | 128 | return read_tsv(vignette_file) 129 | 130 | 131 | def lalonde_data(): 132 | 133 | return read_tsv(lalonde_file) 134 | 135 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help 23 | help: 24 | @echo "Please use \`make ' where is one of" 25 | @echo " html to make standalone HTML files" 26 | @echo " dirhtml to make HTML files named index.html in directories" 27 | @echo " singlehtml to make a single large HTML file" 28 | @echo " pickle to make pickle files" 29 | @echo " json to make JSON files" 30 | @echo " htmlhelp to make HTML files and a HTML help project" 31 | @echo " qthelp to make HTML files and a qthelp project" 32 | @echo " applehelp to make an Apple Help Book" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " epub3 to make an epub3" 36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 37 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 38 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 39 | @echo " text to make text files" 40 | @echo " man to make manual pages" 41 | @echo " texinfo to make Texinfo files" 42 | @echo " info to make Texinfo files and run them through makeinfo" 43 | @echo " gettext to make PO message catalogs" 44 | @echo " changes to make an overview of all changed/added/deprecated items" 45 | @echo " xml to make Docutils-native XML files" 46 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 47 | @echo " linkcheck to check all external links for integrity" 48 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 49 | @echo " coverage to run coverage check of the documentation (if enabled)" 50 | @echo " dummy to check syntax errors of document sources" 51 | 52 | .PHONY: clean 53 | clean: 54 | rm -rf $(BUILDDIR)/* 55 | 56 | .PHONY: html 57 | html: 58 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 61 | 62 | .PHONY: dirhtml 63 | dirhtml: 64 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 65 | @echo 66 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 67 | 68 | .PHONY: singlehtml 69 | singlehtml: 70 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 71 | @echo 72 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 73 | 74 | .PHONY: pickle 75 | pickle: 76 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 77 | @echo 78 | @echo "Build finished; now you can process the pickle files." 79 | 80 | .PHONY: json 81 | json: 82 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 83 | @echo 84 | @echo "Build finished; now you can process the JSON files." 85 | 86 | .PHONY: htmlhelp 87 | htmlhelp: 88 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 89 | @echo 90 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 91 | ".hhp project file in $(BUILDDIR)/htmlhelp." 92 | 93 | .PHONY: qthelp 94 | qthelp: 95 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 96 | @echo 97 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 98 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 99 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Causalinference.qhcp" 100 | @echo "To view the help file:" 101 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Causalinference.qhc" 102 | 103 | .PHONY: applehelp 104 | applehelp: 105 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 106 | @echo 107 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 108 | @echo "N.B. You won't be able to view it unless you put it in" \ 109 | "~/Library/Documentation/Help or install it in your application" \ 110 | "bundle." 111 | 112 | .PHONY: devhelp 113 | devhelp: 114 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 115 | @echo 116 | @echo "Build finished." 117 | @echo "To view the help file:" 118 | @echo "# mkdir -p $$HOME/.local/share/devhelp/Causalinference" 119 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Causalinference" 120 | @echo "# devhelp" 121 | 122 | .PHONY: epub 123 | epub: 124 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 125 | @echo 126 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 127 | 128 | .PHONY: epub3 129 | epub3: 130 | $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 131 | @echo 132 | @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." 133 | 134 | .PHONY: latex 135 | latex: 136 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 137 | @echo 138 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 139 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 140 | "(use \`make latexpdf' here to do that automatically)." 141 | 142 | .PHONY: latexpdf 143 | latexpdf: 144 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 145 | @echo "Running LaTeX files through pdflatex..." 146 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 147 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 148 | 149 | .PHONY: latexpdfja 150 | latexpdfja: 151 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 152 | @echo "Running LaTeX files through platex and dvipdfmx..." 153 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 154 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 155 | 156 | .PHONY: text 157 | text: 158 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 159 | @echo 160 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 161 | 162 | .PHONY: man 163 | man: 164 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 165 | @echo 166 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 167 | 168 | .PHONY: texinfo 169 | texinfo: 170 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 171 | @echo 172 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 173 | @echo "Run \`make' in that directory to run these through makeinfo" \ 174 | "(use \`make info' here to do that automatically)." 175 | 176 | .PHONY: info 177 | info: 178 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 179 | @echo "Running Texinfo files through makeinfo..." 180 | make -C $(BUILDDIR)/texinfo info 181 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 182 | 183 | .PHONY: gettext 184 | gettext: 185 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 186 | @echo 187 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 188 | 189 | .PHONY: changes 190 | changes: 191 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 192 | @echo 193 | @echo "The overview file is in $(BUILDDIR)/changes." 194 | 195 | .PHONY: linkcheck 196 | linkcheck: 197 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 198 | @echo 199 | @echo "Link check complete; look for any errors in the above output " \ 200 | "or in $(BUILDDIR)/linkcheck/output.txt." 201 | 202 | .PHONY: doctest 203 | doctest: 204 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 205 | @echo "Testing of doctests in the sources finished, look at the " \ 206 | "results in $(BUILDDIR)/doctest/output.txt." 207 | 208 | .PHONY: coverage 209 | coverage: 210 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 211 | @echo "Testing of coverage in the sources finished, look at the " \ 212 | "results in $(BUILDDIR)/coverage/python.txt." 213 | 214 | .PHONY: xml 215 | xml: 216 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 217 | @echo 218 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 219 | 220 | .PHONY: pseudoxml 221 | pseudoxml: 222 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 223 | @echo 224 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 225 | 226 | .PHONY: dummy 227 | dummy: 228 | $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy 229 | @echo 230 | @echo "Build finished. Dummy builder generates no files." 231 | -------------------------------------------------------------------------------- /docs/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | 3 | {% block footer %} 4 | {{ super() }} 5 | 9 | 14 | {% endblock %} 15 | -------------------------------------------------------------------------------- /docs/causalinference.core.rst: -------------------------------------------------------------------------------- 1 | causalinference.core package 2 | ============================ 3 | 4 | causalinference.core.data module 5 | -------------------------------- 6 | 7 | .. automodule:: causalinference.core.data 8 | :members: 9 | :show-inheritance: 10 | 11 | causalinference.core.propensity module 12 | -------------------------------------- 13 | 14 | .. automodule:: causalinference.core.propensity 15 | :members: 16 | :show-inheritance: 17 | 18 | causalinference.core.strata module 19 | ---------------------------------- 20 | 21 | .. automodule:: causalinference.core.strata 22 | :members: 23 | :show-inheritance: 24 | 25 | causalinference.core.summary module 26 | ----------------------------------- 27 | 28 | .. automodule:: causalinference.core.summary 29 | :members: 30 | :show-inheritance: 31 | 32 | -------------------------------------------------------------------------------- /docs/causalinference.estimators.rst: -------------------------------------------------------------------------------- 1 | causalinference.estimators package 2 | ================================== 3 | 4 | causalinference.estimators.base module 5 | -------------------------------------- 6 | 7 | .. automodule:: causalinference.estimators.base 8 | :members: 9 | :show-inheritance: 10 | 11 | causalinference.estimators.blocking module 12 | ------------------------------------------ 13 | 14 | .. automodule:: causalinference.estimators.blocking 15 | :members: 16 | :show-inheritance: 17 | 18 | causalinference.estimators.matching module 19 | ------------------------------------------ 20 | 21 | .. automodule:: causalinference.estimators.matching 22 | :members: 23 | :show-inheritance: 24 | 25 | causalinference.estimators.ols module 26 | ------------------------------------- 27 | 28 | .. automodule:: causalinference.estimators.ols 29 | :members: 30 | :show-inheritance: 31 | 32 | causalinference.estimators.weighting module 33 | ------------------------------------------- 34 | 35 | .. automodule:: causalinference.estimators.weighting 36 | :members: 37 | :show-inheritance: 38 | 39 | -------------------------------------------------------------------------------- /docs/causalinference.rst: -------------------------------------------------------------------------------- 1 | causalinference package 2 | ======================= 3 | 4 | This package contains the ``CausalModel`` class, the main interface for assessing the tools of *Causalinference*. 5 | 6 | CausalModel 7 | ----------- 8 | 9 | .. automodule:: causalinference.causal 10 | :members: 11 | :show-inheritance: 12 | 13 | Subpackages 14 | ----------- 15 | 16 | .. toctree:: 17 | :maxdepth: 1 18 | 19 | causalinference.core 20 | causalinference.estimators 21 | causalinference.utils 22 | 23 | -------------------------------------------------------------------------------- /docs/causalinference.utils.rst: -------------------------------------------------------------------------------- 1 | causalinference.utils package 2 | ============================= 3 | 4 | causalinference.utils.tools module 5 | ---------------------------------- 6 | 7 | .. automodule:: causalinference.utils.tools 8 | :members: 9 | :show-inheritance: 10 | 11 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Causalinference documentation build configuration file, created by 4 | # sphinx-quickstart on Sat May 21 18:45:25 2016. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | 18 | # If extensions (or modules to document with autodoc) are in another directory, 19 | # add these directories to sys.path here. If the directory is relative to the 20 | # documentation root, use os.path.abspath to make it absolute, like shown here. 21 | sys.path.insert(0, os.path.abspath('../')) 22 | 23 | # -- General configuration ------------------------------------------------ 24 | 25 | # If your documentation needs a minimal Sphinx version, state it here. 26 | #needs_sphinx = '1.0' 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 30 | # ones. 31 | extensions = [ 32 | 'sphinx.ext.autodoc', 33 | 'sphinx.ext.autosummary', 34 | 'numpydoc', 35 | ] 36 | 37 | # Add any paths that contain templates here, relative to this directory. 38 | templates_path = ['_templates'] 39 | 40 | # The suffix(es) of source filenames. 41 | # You can specify multiple suffix as a list of string: 42 | # source_suffix = ['.rst', '.md'] 43 | source_suffix = '.rst' 44 | 45 | # The encoding of source files. 46 | #source_encoding = 'utf-8-sig' 47 | 48 | # The master toctree document. 49 | master_doc = 'index' 50 | 51 | # General information about the project. 52 | project = u'Causalinference' 53 | copyright = u'2016, Laurence Wong' 54 | author = u'Laurence Wong' 55 | 56 | # The version info for the project you're documenting, acts as replacement for 57 | # |version| and |release|, also used in various other places throughout the 58 | # built documents. 59 | # 60 | # The short X.Y version. 61 | version = u'0.1.3' 62 | # The full version, including alpha/beta/rc tags. 63 | release = u'0.1.3' 64 | 65 | # The language for content autogenerated by Sphinx. Refer to documentation 66 | # for a list of supported languages. 67 | # 68 | # This is also used if you do content translation via gettext catalogs. 69 | # Usually you set "language" from the command line for these cases. 70 | language = None 71 | 72 | # There are two options for replacing |today|: either, you set today to some 73 | # non-false value, then it is used: 74 | #today = '' 75 | # Else, today_fmt is used as the format for a strftime call. 76 | #today_fmt = '%B %d, %Y' 77 | 78 | # List of patterns, relative to source directory, that match files and 79 | # directories to ignore when looking for source files. 80 | # This patterns also effect to html_static_path and html_extra_path 81 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 82 | 83 | # The reST default role (used for this markup: `text`) to use for all 84 | # documents. 85 | #default_role = None 86 | 87 | # If true, '()' will be appended to :func: etc. cross-reference text. 88 | #add_function_parentheses = True 89 | 90 | # If true, the current module name will be prepended to all description 91 | # unit titles (such as .. function::). 92 | #add_module_names = True 93 | 94 | # If true, sectionauthor and moduleauthor directives will be shown in the 95 | # output. They are ignored by default. 96 | #show_authors = False 97 | 98 | # The name of the Pygments (syntax highlighting) style to use. 99 | pygments_style = 'sphinx' 100 | 101 | # A list of ignored prefixes for module index sorting. 102 | #modindex_common_prefix = [] 103 | 104 | # If true, keep warnings as "system message" paragraphs in the built documents. 105 | #keep_warnings = False 106 | 107 | # If true, `todo` and `todoList` produce output, else they produce nothing. 108 | todo_include_todos = False 109 | 110 | 111 | # -- Options for HTML output ---------------------------------------------- 112 | 113 | # The theme to use for HTML and HTML Help pages. See the documentation for 114 | # a list of builtin themes. 115 | html_theme = 'classic' 116 | 117 | # Theme options are theme-specific and customize the look and feel of a theme 118 | # further. For a list of options available for each theme, see the 119 | # documentation. 120 | #html_theme_options = {} 121 | 122 | # Add any paths that contain custom themes here, relative to this directory. 123 | #html_theme_path = [] 124 | 125 | # The name for this set of Sphinx documents. 126 | # " v documentation" by default. 127 | #html_title = u'Causalinference v0.1.3' 128 | 129 | # A shorter title for the navigation bar. Default is the same as html_title. 130 | #html_short_title = None 131 | 132 | # The name of an image file (relative to this directory) to place at the top 133 | # of the sidebar. 134 | #html_logo = None 135 | 136 | # The name of an image file (relative to this directory) to use as a favicon of 137 | # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 138 | # pixels large. 139 | html_favicon = 'favicon.png' 140 | 141 | # Add any paths that contain custom static files (such as style sheets) here, 142 | # relative to this directory. They are copied after the builtin static files, 143 | # so a file named "default.css" will overwrite the builtin "default.css". 144 | html_static_path = ['_static'] 145 | 146 | # Add any extra paths that contain custom files (such as robots.txt or 147 | # .htaccess) here, relative to this directory. These files are copied 148 | # directly to the root of the documentation. 149 | #html_extra_path = [] 150 | 151 | # If not None, a 'Last updated on:' timestamp is inserted at every page 152 | # bottom, using the given strftime format. 153 | # The empty string is equivalent to '%b %d, %Y'. 154 | #html_last_updated_fmt = None 155 | 156 | # If true, SmartyPants will be used to convert quotes and dashes to 157 | # typographically correct entities. 158 | #html_use_smartypants = True 159 | 160 | # Custom sidebar templates, maps document names to template names. 161 | #html_sidebars = {} 162 | 163 | # Additional templates that should be rendered to pages, maps page names to 164 | # template names. 165 | #html_additional_pages = {} 166 | 167 | # If false, no module index is generated. 168 | html_domain_indices = False 169 | 170 | # If false, no index is generated. 171 | #html_use_index = True 172 | 173 | # If true, the index is split into individual pages for each letter. 174 | #html_split_index = False 175 | 176 | # If true, links to the reST sources are added to the pages. 177 | html_show_sourcelink = False 178 | 179 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 180 | html_show_sphinx = False 181 | 182 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 183 | #html_show_copyright = True 184 | 185 | # If true, an OpenSearch description file will be output, and all pages will 186 | # contain a tag referring to it. The value of this option must be the 187 | # base URL from which the finished HTML is served. 188 | #html_use_opensearch = '' 189 | 190 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 191 | #html_file_suffix = None 192 | 193 | # Language to be used for generating the HTML full-text search index. 194 | # Sphinx supports the following languages: 195 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' 196 | # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh' 197 | #html_search_language = 'en' 198 | 199 | # A dictionary with options for the search language support, empty by default. 200 | # 'ja' uses this config value. 201 | # 'zh' user can custom change `jieba` dictionary path. 202 | #html_search_options = {'type': 'default'} 203 | 204 | # The name of a javascript file (relative to the configuration directory) that 205 | # implements a search results scorer. If empty, the default will be used. 206 | #html_search_scorer = 'scorer.js' 207 | 208 | # Output file base name for HTML help builder. 209 | htmlhelp_basename = 'Causalinferencedoc' 210 | 211 | # -- Options for LaTeX output --------------------------------------------- 212 | 213 | latex_elements = { 214 | # The paper size ('letterpaper' or 'a4paper'). 215 | #'papersize': 'letterpaper', 216 | 217 | # The font size ('10pt', '11pt' or '12pt'). 218 | #'pointsize': '10pt', 219 | 220 | # Additional stuff for the LaTeX preamble. 221 | #'preamble': '', 222 | 223 | # Latex figure (float) alignment 224 | #'figure_align': 'htbp', 225 | } 226 | 227 | # Grouping the document tree into LaTeX files. List of tuples 228 | # (source start file, target name, title, 229 | # author, documentclass [howto, manual, or own class]). 230 | latex_documents = [ 231 | (master_doc, 'Causalinference.tex', u'Causalinference Documentation', 232 | u'Laurence Wong', 'manual'), 233 | ] 234 | 235 | # The name of an image file (relative to this directory) to place at the top of 236 | # the title page. 237 | #latex_logo = None 238 | 239 | # For "manual" documents, if this is true, then toplevel headings are parts, 240 | # not chapters. 241 | #latex_use_parts = False 242 | 243 | # If true, show page references after internal links. 244 | #latex_show_pagerefs = False 245 | 246 | # If true, show URL addresses after external links. 247 | #latex_show_urls = False 248 | 249 | # Documents to append as an appendix to all manuals. 250 | #latex_appendices = [] 251 | 252 | # If false, no module index is generated. 253 | #latex_domain_indices = True 254 | 255 | 256 | # -- Options for manual page output --------------------------------------- 257 | 258 | # One entry per manual page. List of tuples 259 | # (source start file, name, description, authors, manual section). 260 | man_pages = [ 261 | (master_doc, 'causalinference', u'Causalinference Documentation', 262 | [author], 1) 263 | ] 264 | 265 | # If true, show URL addresses after external links. 266 | #man_show_urls = False 267 | 268 | 269 | # -- Options for Texinfo output ------------------------------------------- 270 | 271 | # Grouping the document tree into Texinfo files. List of tuples 272 | # (source start file, target name, title, author, 273 | # dir menu entry, description, category) 274 | texinfo_documents = [ 275 | (master_doc, 'Causalinference', u'Causalinference Documentation', 276 | author, 'Causalinference', 'One line description of project.', 277 | 'Miscellaneous'), 278 | ] 279 | 280 | # Documents to append as an appendix to all manuals. 281 | #texinfo_appendices = [] 282 | 283 | # If false, no module index is generated. 284 | #texinfo_domain_indices = True 285 | 286 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 287 | #texinfo_show_urls = 'footnote' 288 | 289 | # If true, do not generate a @detailmenu in the "Top" node's menu. 290 | #texinfo_no_detailmenu = False 291 | 292 | # -- Custom configuration ------------------------------------------------ 293 | 294 | autodoc_member_order = 'bysource' 295 | numpydoc_show_class_members = False 296 | -------------------------------------------------------------------------------- /docs/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/laurencium/Causalinference/630e8fb195754a720da41791b725d3dadabfb257/docs/favicon.png -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. Causalinference documentation master file, created by 2 | sphinx-quickstart on Fri May 20 18:53:32 2016. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Causal Inference in Python 7 | ========================== 8 | 9 | *Causal Inference in Python*, or *Causalinference* in short, is a software package that implements various statistical and econometric methods used in the field variously known as Causal Inference, Program Evaluation, or Treatment Effect Analysis. 10 | 11 | Work on *Causalinference* started in 2014 by Laurence Wong as a personal side project. It is distributed under the 3-Clause BSD license. 12 | 13 | Important Links 14 | --------------- 15 | 16 | The official website for *Causalinference* is 17 | 18 | https://causalinferenceinpython.org 19 | 20 | The most current development version is hosted on GitHub at 21 | 22 | https://github.com/laurencium/causalinference 23 | 24 | Package source and binary distribution files are available from PyPi at 25 | 26 | https://pypi.python.org/pypi/causalinference 27 | 28 | For an overview of the main features and uses of *Causalinference*, please refer to 29 | 30 | https://github.com/laurencium/causalinference/blob/master/docs/tex/vignette.pdf 31 | 32 | A blog dedicated to providing a more detailed walkthrough of *Causalinference* and the econometric theory behind it can be found at 33 | 34 | https://laurencewong.com/software/ 35 | 36 | Main Features 37 | ------------- 38 | 39 | * Assessment of overlap in covariate distributions 40 | * Estimation of propensity score 41 | * Improvement of covariate balance through trimming 42 | * Subclassification on propensity score 43 | * Estimation of treatment effects via matching, blocking, weighting, and least squares 44 | 45 | Dependencies 46 | ------------ 47 | 48 | * NumPy: 1.8.2 or higher 49 | * SciPy: 0.13.3 or higher 50 | 51 | Installation 52 | ------------ 53 | 54 | *Causalinference* can be installed using ``pip``: :: 55 | 56 | $ pip install causalinference 57 | 58 | For help on setting up Pip, NumPy, and SciPy on Macs, check out this excellent `guide `_. 59 | 60 | Minimal Example 61 | --------------- 62 | 63 | The following illustrates how to create an instance of CausalModel: :: 64 | 65 | >>> from causalinference import CausalModel 66 | >>> from causalinference.utils import random_data 67 | >>> Y, D, X = random_data() 68 | >>> causal = CausalModel(Y, D, X) 69 | 70 | Invoking ``help`` on ``causal`` at this point should return a comprehensive listing of all the causal analysis tools available in *Causalinference*. 71 | 72 | Detailed Documentation 73 | ---------------------- 74 | .. toctree:: 75 | :maxdepth: 3 76 | 77 | causalinference 78 | 79 | Site Navigation 80 | --------------- 81 | 82 | * :ref:`genindex` 83 | * :ref:`search` 84 | 85 | -------------------------------------------------------------------------------- /docs/tex/references.bib: -------------------------------------------------------------------------------- 1 | % This file was created with JabRef 2.7b. 2 | % Encoding: Cp1252 3 | 4 | @BOOK{vanderVaart.1998, 5 | title = {Asymptotic {Statistics}}, 6 | publisher = {Cambridge University Press}, 7 | year = {1998}, 8 | author = {{van der Vaart}, Aad W.}, 9 | owner = {alumni}, 10 | timestamp = {2010.04.30} 11 | } 12 | 13 | @BOOK{vanderVaartWellner.1996, 14 | title = {Weak {Convergence} and {Empirical} {Processes}}, 15 | publisher = {Springer}, 16 | year = {1996}, 17 | author = {{van der Vaart}, Aad W. and Wellner, Jon A.}, 18 | address = {New York}, 19 | owner = {alumni}, 20 | timestamp = {2010.04.30} 21 | } 22 | 23 | @ARTICLE{AbadieAngristImbens.2002, 24 | author = {Alberto Abadie and Joshua Angrist and Guido Imbens}, 25 | title = {Instrumental variables estimates of the effect of subsidized training 26 | on the quantiles of trainee earnings}, 27 | journal = {Econometrica}, 28 | year = {2002}, 29 | volume = {70}, 30 | pages = {91-117}, 31 | owner = {Laurence}, 32 | timestamp = {2010.03.02} 33 | } 34 | 35 | @ARTICLE{AbadieDiamondHainmueller.2010, 36 | author = {Alberto Abadie and Alexis Diamond and Jens Hainmueller}, 37 | title = {Synthetic control methods for comparative case studies: estimating 38 | the effect of California's tobacco control program}, 39 | journal = {Journal of American Statistical Association}, 40 | year = {2010}, 41 | volume = {105}, 42 | pages = {493-505}, 43 | owner = {Laurence}, 44 | timestamp = {2010.01.02} 45 | } 46 | 47 | @ARTICLE{AbadieGardeazabal.2003, 48 | author = {Alberto Abadie and Javier Gardeazabal}, 49 | title = {The economic costs of conflict: a case study of the Basque country}, 50 | journal = {American Economic Review}, 51 | year = {2003}, 52 | volume = {93}, 53 | pages = {113-132}, 54 | owner = {Laurence}, 55 | timestamp = {2010.01.02} 56 | } 57 | 58 | @ARTICLE{AbadieImbens.2006, 59 | author = {Alberto Abadie and Guido Imbens}, 60 | title = {Large Sample Properties of Matching Estimators for Average Treatment 61 | Effects}, 62 | journal = {Econometrica}, 63 | year = {2006}, 64 | volume = {74}, 65 | pages = {235-267}, 66 | owner = {laurence}, 67 | timestamp = {2014.10.05} 68 | } 69 | 70 | @ARTICLE{Allcott.2011, 71 | author = {Hunt Allcott}, 72 | title = {Social {Norms} and {Energy} {Conservation}}, 73 | journal = {Journal of Public Economics}, 74 | year = {2011}, 75 | volume = {95}, 76 | pages = {1082-1095}, 77 | owner = {laurencium}, 78 | timestamp = {2013.10.18} 79 | } 80 | 81 | @BOOK{Amemiya.1985, 82 | title = {Advanced econometrics}, 83 | publisher = {Harvard University Press}, 84 | year = {1985}, 85 | author = {Takeshi Amemiya}, 86 | address = {Cambridge, MA}, 87 | owner = {Laurence}, 88 | timestamp = {2010.01.01} 89 | } 90 | 91 | @ARTICLE{Andrews.1991, 92 | author = {Donald W. K. Andrews}, 93 | title = {Heteroskedasticity and autocorrelation consistent covariance matrix 94 | estimation}, 95 | journal = {Econometrica}, 96 | year = {1991}, 97 | volume = {59}, 98 | pages = {817-858}, 99 | owner = {Laurence}, 100 | timestamp = {2010.08.26} 101 | } 102 | 103 | @ARTICLE{AngristChernozhukovFernandezVal.2006, 104 | author = {Joshua Angrist and Victor Chernozhukov and Ivan Fernandez-Val}, 105 | title = {Quantile regression under misspecification, with an application to 106 | the U.S. wage structure}, 107 | journal = {Econometrica}, 108 | year = {2006}, 109 | volume = {74}, 110 | pages = {539-563}, 111 | owner = {Laurence}, 112 | timestamp = {2010.01.02} 113 | } 114 | 115 | @ARTICLE{Angrist.1990, 116 | author = {Joshua D. Angrist}, 117 | title = {Lifetime {Earnings} and the {Vietnam} {Era} {Draft} {Lottery}: {Evidence} 118 | from {Social} {Security} {Administrative} {Records}}, 119 | journal = {American Economic Review}, 120 | year = {1990}, 121 | volume = {80}, 122 | pages = {313-336}, 123 | owner = {Laurence}, 124 | timestamp = {2010.07.14} 125 | } 126 | 127 | @ARTICLE{AngristImbensRubin.1996, 128 | author = {Joshua D. Angrist and Guido W. Imbens and Donald B. Rubin}, 129 | title = {Identification of {Causal} {Effects} {Using} {Instrumental} {Variables}}, 130 | journal = {Journal of the American Statistical Association}, 131 | year = {1996}, 132 | volume = {91}, 133 | pages = {444-455}, 134 | owner = {Laurence}, 135 | timestamp = {2010.02.11} 136 | } 137 | 138 | @BOOK{AngristPischke.2009, 139 | title = {Mostly harmless econometrics: an empiricist's companion}, 140 | publisher = {Princeton University Press}, 141 | year = {2009}, 142 | author = {Joshua D. Angrist and Jorn-Stefeen Pischke}, 143 | owner = {Laurence}, 144 | timestamp = {2010.01.03} 145 | } 146 | 147 | @ARTICLE{Arellano.1987, 148 | author = {Manuel Arellano}, 149 | title = {Computing robust standard errors for within-in groups estimators}, 150 | journal = {Oxford Bulletin of Economics and Statistics}, 151 | year = {1987}, 152 | volume = {49}, 153 | pages = {431-434}, 154 | owner = {Laurence}, 155 | timestamp = {2010.01.08} 156 | } 157 | 158 | @ARTICLE{AtheyImbens.2006, 159 | author = {Susan Athey and Guido W. Imbens}, 160 | title = {Identification and inference in nonlinear difference-in-differences 161 | models}, 162 | journal = {Econometrica}, 163 | year = {2006}, 164 | volume = {74}, 165 | pages = {431-497}, 166 | owner = {Laurence}, 167 | timestamp = {2010.08.12} 168 | } 169 | 170 | @ARTICLE{Bai.2009, 171 | author = {Jushan Bai}, 172 | title = {Panel data models with interactive fixed effects}, 173 | journal = {Econometrica}, 174 | year = {2009}, 175 | volume = {77}, 176 | pages = {1229-1279}, 177 | owner = {Laurence}, 178 | timestamp = {2010.01.07} 179 | } 180 | 181 | @ARTICLE{Beran.1995, 182 | author = {Rudolf Beran}, 183 | title = {Stein confidence sets and the bootstrap}, 184 | journal = {Statistica Sinica}, 185 | year = {1995}, 186 | volume = {5}, 187 | pages = {109-127}, 188 | owner = {Laurence}, 189 | timestamp = {2011.03.16} 190 | } 191 | 192 | @ARTICLE{Berry.1994, 193 | author = {Steven T. Berry}, 194 | title = {Estimating discrete-choice models of product differentiation}, 195 | journal = {RAND Journal of Economics}, 196 | year = {1994}, 197 | volume = {25}, 198 | pages = {242-262}, 199 | owner = {Laurence}, 200 | timestamp = {2010.01.01} 201 | } 202 | 203 | @ARTICLE{BerryLevinsohnPakes.1995, 204 | author = {Steven T. Berry and James Levinsohn and Ariel Pakes}, 205 | title = {Automobile prices in market equilibrium}, 206 | journal = {Econometrica}, 207 | year = {1995}, 208 | volume = {63}, 209 | pages = {841-890}, 210 | owner = {Laurence}, 211 | timestamp = {2010.01.01} 212 | } 213 | 214 | @ARTICLE{BertrandDufloMullainathan.2004, 215 | author = {Marianne Bertrand and Esther Duflo and Sendhil Mullainathan}, 216 | title = {How much should we trust differences-in-differences estimates?}, 217 | journal = {Quarterly Journal of Economics}, 218 | year = {2004}, 219 | volume = {119}, 220 | pages = {249-275}, 221 | owner = {Laurence}, 222 | timestamp = {2010.02.11} 223 | } 224 | 225 | @ARTICLE{BickelRitovTsybakov.2009, 226 | author = {Peter J. Bickel and Ya'acov Ritov and Alexandre B. Tsybakov}, 227 | title = {Simultaneous analysis of lasso and Dantzig selector}, 228 | journal = {Annals of Statistics}, 229 | year = {2009}, 230 | volume = {37}, 231 | pages = {1705-1732}, 232 | owner = {Laurence}, 233 | timestamp = {2011.03.16} 234 | } 235 | 236 | @BOOK{Billingsley.1995, 237 | title = {Probability and measure}, 238 | publisher = {Wiley}, 239 | year = {1995}, 240 | author = {Patrick Billingsley}, 241 | address = {New York}, 242 | edition = {3rd}, 243 | owner = {alumni}, 244 | timestamp = {2010.04.30} 245 | } 246 | 247 | @ARTICLE{BitlerGelbachHoynes.2006, 248 | author = {Marianne P. Bitler and Jonah B. Gelbach and Hilary W. Hoynes}, 249 | title = {What mean impacts miss: distributional effects of welfare reform 250 | experiments}, 251 | journal = {American Economic Review}, 252 | year = {2006}, 253 | volume = {96}, 254 | pages = {988-1012}, 255 | owner = {Laurence}, 256 | timestamp = {2010.03.06} 257 | } 258 | 259 | @BOOK{BoydVandenberghe.2004, 260 | title = {Convex {Optimization}}, 261 | publisher = {Cambridge University Press}, 262 | year = {2004}, 263 | author = {Stephen Boyd and Lieven Vandenberghe}, 264 | owner = {laurencium}, 265 | timestamp = {2013.11.14} 266 | } 267 | 268 | @BOOK{CameronTrivedi.2005, 269 | title = {Microeconometrics: methods and applications}, 270 | publisher = {Cambridge University Press}, 271 | year = {2005}, 272 | author = {A. Colin Cameron and Pravin K. Trivedi}, 273 | owner = {Laurence}, 274 | timestamp = {2010.01.01} 275 | } 276 | 277 | @BOOK{CampbellLoMacKinlay.1997, 278 | title = {The econometrics of financial markets}, 279 | publisher = {Princeton University Press}, 280 | year = {1997}, 281 | author = {John Y. Campbell and Andrew W. Lo and A. Craig MacKinlay}, 282 | owner = {Laurence}, 283 | timestamp = {2010.01.05} 284 | } 285 | 286 | @ARTICLE{CandesTao.2007, 287 | author = {Emmanuel Candes and Terence Tao}, 288 | title = {The Dantzig selector: statisical estimation when $p$ is much larger 289 | than $n$}, 290 | journal = {Annals of Statistics}, 291 | year = {2007}, 292 | volume = {35}, 293 | pages = {2313-2351}, 294 | owner = {Laurence}, 295 | timestamp = {2011.03.16} 296 | } 297 | 298 | @BOOK{CapinskiKopp.2004, 299 | title = {Measure, integral and probability}, 300 | publisher = {Springer}, 301 | year = {2004}, 302 | author = {Capi\'{n}ski, Marek and Kopp, Ekkehard}, 303 | address = {London}, 304 | edition = {2nd}, 305 | owner = {alumni}, 306 | timestamp = {2010.04.30} 307 | } 308 | 309 | @ARTICLE{CardHyslop.2005, 310 | author = {David Card and Dean R. Hyslop}, 311 | title = {Estimating the {Effects} of a {Time-limited} {Earnings} {Subsidy} 312 | for {Welfare-leavers}}, 313 | journal = {Econometrica}, 314 | year = {2005}, 315 | volume = {73}, 316 | pages = {1723-1770}, 317 | owner = {laurencium}, 318 | timestamp = {2013.10.18} 319 | } 320 | 321 | @ARTICLE{CardKrueger.1994, 322 | author = {David Card and Alan B. Krueger}, 323 | title = {Minimum Wages and Employment: A Case Study of the Fast-Food Industry 324 | in New Jersey and Pennsylvania}, 325 | journal = {American Economic Review}, 326 | year = {1994}, 327 | volume = {84}, 328 | pages = {772-793}, 329 | owner = {laurence}, 330 | timestamp = {2014.10.25} 331 | } 332 | 333 | @ARTICLE{CardMcCall.1996, 334 | author = {David Card and Brain P. McCall}, 335 | title = {Is {Workers'} {Compensation} {Covering} {Uninsured} {Medical} {Costs}? 336 | Evidence from the {`Monday Effect'}}, 337 | journal = {Industrial and Labor Relations Review}, 338 | year = {1996}, 339 | volume = {49}, 340 | pages = {690-706}, 341 | owner = {laurencium}, 342 | timestamp = {2013.10.18} 343 | } 344 | 345 | @ARTICLE{ChernozhukovHansen.2008, 346 | author = {Victor Chernozhukov and Christian Hansen}, 347 | title = {Instrumental variable quantile regressoin: a robust inference approach}, 348 | journal = {Journal of Econometrics}, 349 | year = {2008}, 350 | volume = {142}, 351 | pages = {379-398}, 352 | owner = {Laurence}, 353 | timestamp = {2010.01.01} 354 | } 355 | 356 | @ARTICLE{ChernozhukovHansen.2006, 357 | author = {Victor Chernozhukov and Christian Hansen}, 358 | title = {Instrumental quantile regression inference for structural and treatment 359 | effect models}, 360 | journal = {Journal of Econometrics}, 361 | year = {2006}, 362 | volume = {132}, 363 | pages = {491-525}, 364 | owner = {Laurence}, 365 | timestamp = {2010.01.15} 366 | } 367 | 368 | @ARTICLE{ChernozhukovHansen.2005, 369 | author = {Victor Chernozhukov and Christian Hansen}, 370 | title = {An IV model of quantile treatment effects}, 371 | journal = {Econometrica}, 372 | year = {2005}, 373 | volume = {73}, 374 | pages = {245-261}, 375 | owner = {Laurence}, 376 | timestamp = {2010.01.01} 377 | } 378 | 379 | @ARTICLE{ChernozhukovHansen.2004, 380 | author = {Victor Chernozhukov and Christian Hansen}, 381 | title = {The effects of 401(k) participation on the wealth distribution: an 382 | instrumental quantile regression analysis}, 383 | journal = {Review of Economics and Statistics}, 384 | year = {2004}, 385 | volume = {86}, 386 | pages = {735-751}, 387 | owner = {Laurence}, 388 | timestamp = {2010.03.06} 389 | } 390 | 391 | @BOOK{CLRS.2009, 392 | title = {Introduction to Algorithms}, 393 | publisher = {MIT Press}, 394 | year = {2009}, 395 | author = {Thomas H. Cormen and Charles E. Leiserson and Ronald L. Rivest and 396 | Clifford Stein}, 397 | owner = {laurence}, 398 | timestamp = {2014.10.19} 399 | } 400 | 401 | @ARTICLE{CrumpHotzImbensMitnik.2009, 402 | author = {Crump, R. and Hotz, V. J. and Imbens, G. and Mitnik, O.}, 403 | title = {Dealing with Limited Overlap in Estimation of Average Treatment Effects}, 404 | journal = {Biometrika}, 405 | year = {2009}, 406 | volume = {96}, 407 | pages = {187-199}, 408 | owner = {laurence}, 409 | timestamp = {2015.08.30} 410 | } 411 | 412 | @BOOK{Csorgo.1983, 413 | title = {Quantile processes with statistical applications}, 414 | publisher = {SIAM}, 415 | year = {1983}, 416 | author = {Mikl\'{o}s Cs\"{o}rg\H{o}}, 417 | series = {CBMS-NSF Regional Conference Series in Applied Mathematics 42}, 418 | address = {Philadelphia}, 419 | owner = {laurence}, 420 | timestamp = {2012.10.29} 421 | } 422 | 423 | @BOOK{Davidson.1994, 424 | title = {Stochastic limit theory}, 425 | publisher = {Oxford University Press}, 426 | year = {1994}, 427 | author = {James Davidson}, 428 | owner = {alumni}, 429 | timestamp = {2010.04.30} 430 | } 431 | 432 | @BOOK{DavidsonMacKinnon.2004, 433 | title = {Econometric theory and methods}, 434 | publisher = {Oxford University Press}, 435 | year = {2004}, 436 | author = {Davidson, Russell and MacKinnon, James Gordon} 437 | } 438 | 439 | @BOOK{DavidsonMacKinnon.1993, 440 | title = {Estimation and inference in econometrics}, 441 | publisher = {Oxford University Press}, 442 | year = {1993}, 443 | author = {Davidson, Russell and MacKinnon, James Gordon} 444 | } 445 | 446 | @ARTICLE{DehejiaWahba.2002, 447 | author = {Rajeev H. Dehejia and Sadek Wahba}, 448 | title = {Propensity score-matching methods for nonexperimental causal studies}, 449 | journal = {Review of Economics and Statistics}, 450 | year = {2002}, 451 | volume = {84}, 452 | pages = {151-161}, 453 | owner = {Laurence}, 454 | timestamp = {2010.01.02} 455 | } 456 | 457 | @ARTICLE{DehejiaWahba.1999, 458 | author = {Rajeev H. Dehejia and Sadek Wahba}, 459 | title = {Causal {Effects} in {Nonexperimental} {Studies}: {Reevaluating} the 460 | {Evaluation} of {Training} {Programs}}, 461 | journal = {Journal of the American Statistical Association}, 462 | year = {1999}, 463 | volume = {94}, 464 | pages = {1053-1062}, 465 | owner = {Laurence}, 466 | timestamp = {2010.01.02} 467 | } 468 | 469 | @ARTICLE{DonaldHsu.2011, 470 | author = {Stephen G. Donald and Yu-Chin Hsu}, 471 | title = {Estimation and {Inference} for {Distribution} {Functions} and {Quantile} 472 | {Functions} in {Treatment} {Effect} {Models}}, 473 | journal = {Unpublished manuscript}, 474 | year = {2011}, 475 | owner = {laurencium}, 476 | timestamp = {2013.10.20} 477 | } 478 | 479 | @ARTICLE{DonaldLang.2007, 480 | author = {Stephen G. Donald and Kevin Lang}, 481 | title = {Inference with difference-in-differences and other panel data}, 482 | journal = {Review of Economics and Statistics}, 483 | year = {2007}, 484 | volume = {89}, 485 | pages = {221-233}, 486 | owner = {Laurence}, 487 | timestamp = {2010.01.19} 488 | } 489 | 490 | @ARTICLE{DonohoJohnstoneKerykyacharianPicard.1995, 491 | author = {David L. Donoho and Iain M. Johnstone and Gerard Kerkyacharian and 492 | Dominique Picard}, 493 | title = {Wavelet shrinkage: asymptopia?}, 494 | journal = {Journal of the Royal Statistical Society, Series B}, 495 | year = {1995}, 496 | volume = {57}, 497 | pages = {301-369}, 498 | owner = {Laurence}, 499 | timestamp = {2011.03.16} 500 | } 501 | 502 | @ARTICLE{Efron.1979, 503 | author = {Efron, Bradley}, 504 | title = {Bootstrapping methods: Another look at the jackknife}, 505 | journal = {Annals of Statistics}, 506 | year = {1979}, 507 | volume = {7}, 508 | pages = {1--26} 509 | } 510 | 511 | @ARTICLE{EfronHastieJohnstoneTibshirani.2004, 512 | author = {Bradley Efron and Trevor Hastie and Iain M. Johnstone and Robert 513 | Tibshirani}, 514 | title = {Least angle regression}, 515 | journal = {Annals of Statistics}, 516 | year = {2004}, 517 | volume = {32}, 518 | pages = {407-499}, 519 | owner = {Laurence}, 520 | timestamp = {2011.03.16} 521 | } 522 | 523 | @ARTICLE{EngleGranger.1987, 524 | author = {Robert F. Engle and Clive W. J. Granger}, 525 | title = {Co-integration and error correction: respresentation, estimation 526 | and testing}, 527 | journal = {Econometrica}, 528 | year = {1987}, 529 | volume = {55}, 530 | pages = {251-276}, 531 | owner = {Laurence}, 532 | timestamp = {2011.06.09} 533 | } 534 | 535 | @ARTICLE{FanPark.2009, 536 | author = {Yanqin Fan and Sang Soo Park}, 537 | title = {Sharp {Bounds} on the {Distribution} of {Treatment} {Effects} and 538 | {Their} {Statistical} {Inference}}, 539 | journal = {Econometric Theory}, 540 | year = {2009}, 541 | volume = {26}, 542 | pages = {1-21}, 543 | owner = {Laurence}, 544 | timestamp = {2010.02.20} 545 | } 546 | 547 | @ARTICLE{Firpo.2007, 548 | author = {Sergio Firpo}, 549 | title = {Efficient {Semiparametric} {Estimation} of {Quantile} {Treatment} 550 | {Effects}}, 551 | journal = {Econometrica}, 552 | year = {2007}, 553 | volume = {75}, 554 | pages = {259-276}, 555 | owner = {Laurence}, 556 | timestamp = {2010.01.01} 557 | } 558 | 559 | @ARTICLE{FirpoRidder.2008, 560 | author = {Firpo, Sergio and Ridder, Geert}, 561 | title = {Bounds on {Functionals} of the {Distribution} of {Treatment} {Effects}}, 562 | journal = {IEPR Working Paper}, 563 | year = {2008}, 564 | owner = {laurencium}, 565 | timestamp = {2013.11.14} 566 | } 567 | 568 | @ARTICLE{FrischWaugh.1933, 569 | author = {Frisch, Ragnar and Waugh, Frederick V.}, 570 | title = {Partial time regressions as compared with individual trends}, 571 | journal = {Econometrica}, 572 | year = {1933}, 573 | volume = {1}, 574 | pages = {387-401} 575 | } 576 | 577 | @ARTICLE{GautierSiegmannVanVuuren.2009, 578 | author = {Pieter A. Gautier and Arjen Siegmann and Aico {Van Vuuren}}, 579 | title = {Terrorism and attitudes towards minorities: the effect of the Theo 580 | van Gogh murder on house prices in Amsterdam}, 581 | journal = {Journal of Urban Economics}, 582 | year = {2009}, 583 | volume = {65}, 584 | pages = {113-126}, 585 | owner = {Laurence}, 586 | timestamp = {2011.02.20} 587 | } 588 | 589 | @BOOK{Goldberger.1991, 590 | title = {A course in econometrics}, 591 | publisher = {Harvard University Press}, 592 | year = {1991}, 593 | author = {Arthur S. Goldberger}, 594 | address = {Cambridge, MA}, 595 | owner = {Laurence}, 596 | timestamp = {2010.01.01} 597 | } 598 | 599 | @BOOK{Greene.2002, 600 | title = {Econometric analysis}, 601 | publisher = {Prentice-Hall}, 602 | year = {2002}, 603 | author = {Greene, William H.}, 604 | address = {New York}, 605 | edition = {5th} 606 | } 607 | 608 | @ARTICLE{Hahn.1998, 609 | author = {Jinyong Hahn}, 610 | title = {On the role of the propensity score in efficient semiparametric estimation 611 | of average treatment effects}, 612 | journal = {Econometrica}, 613 | year = {1998}, 614 | volume = {66}, 615 | pages = {315-331}, 616 | owner = {Laurence}, 617 | timestamp = {2010.01.02} 618 | } 619 | 620 | @ARTICLE{HahnToddvanderKlaauw.2001, 621 | author = {Hahn, Jinyong and Todd, Petra and {van der Klaauw}, Wilbert}, 622 | title = {Identification and {Estimation} of {Treatment} {Effects} with a {Regression-Discontinuity} 623 | {Design}}, 624 | journal = {Econometrica}, 625 | year = {2001}, 626 | volume = {69}, 627 | pages = {201-209}, 628 | owner = {Laurence}, 629 | timestamp = {2010.02.02} 630 | } 631 | 632 | @BOOK{Hamilton.1994, 633 | title = {Time series analysis}, 634 | publisher = {Princeton University Press}, 635 | year = {1994}, 636 | author = {Hamilton, James D.}, 637 | address = {Princeton} 638 | } 639 | 640 | @ARTICLE{HanHausman.1990, 641 | author = {Aaron Han and Jerry A. Hausman}, 642 | title = {Flexible parametric estimation of duration and competing risk models}, 643 | journal = {Journal of Applied Econometrics}, 644 | year = {1990}, 645 | volume = {5}, 646 | pages = {1-28}, 647 | owner = {Laurence}, 648 | timestamp = {2010.02.20} 649 | } 650 | 651 | @ARTICLE{Hansen.1982, 652 | author = {Hansen, Lars Peter}, 653 | title = {Large sample properties of generalized method of moments estimators}, 654 | journal = {Econometrica}, 655 | year = {1982}, 656 | volume = {50}, 657 | pages = {1029--1054} 658 | } 659 | 660 | @BOOK{HastieTibshiraniFriedman.2009, 661 | title = {The Elements of Statistical Learning}, 662 | publisher = {Springer}, 663 | year = {2009}, 664 | author = {Hastie, Trevor and Tibshirani, Robert and Friedman, Jerome}, 665 | owner = {laurence}, 666 | timestamp = {2014.10.05} 667 | } 668 | 669 | @ARTICLE{Hausman.1985, 670 | author = {Jerry A. Hausman}, 671 | title = {The econometrics of nonlinear budget sets}, 672 | journal = {Econometrica}, 673 | year = {1985}, 674 | volume = {53}, 675 | pages = {1255-1282}, 676 | owner = {Laurence}, 677 | timestamp = {2010.01.01} 678 | } 679 | 680 | @ARTICLE{HausmanTaylor.1981, 681 | author = {Jerry A. Hausman and William E. Taylor}, 682 | title = {Panel data and unobservable individual effects}, 683 | journal = {Econometrica}, 684 | year = {1981}, 685 | volume = {49}, 686 | pages = {1377-1398}, 687 | owner = {Laurence}, 688 | timestamp = {2010.01.01} 689 | } 690 | 691 | @ARTICLE{HausmanWise.1978, 692 | author = {Jerry A. Hausman and David A. Wise}, 693 | title = {A conditional probit model for qualitative choice: discrete decisions 694 | recognizing interdependence and heterogeneous preferences}, 695 | journal = {Econometrica}, 696 | year = {1978}, 697 | volume = {46}, 698 | pages = {403-426}, 699 | owner = {Laurence}, 700 | timestamp = {2010.02.18} 701 | } 702 | 703 | @BOOK{Hayashi.2000, 704 | title = {Econometrics}, 705 | publisher = {Princeton University Press}, 706 | year = {2000}, 707 | author = {Hayashi, Fumio}, 708 | address = {Princeton} 709 | } 710 | 711 | @ARTICLE{Heckman.1979, 712 | author = {James J. Heckman}, 713 | title = {Sample selection bias as a specification error}, 714 | journal = {Econometrica}, 715 | year = {1979}, 716 | volume = {47}, 717 | pages = {153-161}, 718 | owner = {Laurence}, 719 | timestamp = {2010.01.01} 720 | } 721 | 722 | @ARTICLE{Hinrichs.2011, 723 | author = {Peter Hinrichs}, 724 | title = {The effects of affirmative action bans on college enrollment, educational 725 | attainment, and the demographic composition of universities}, 726 | journal = {Review of Economics and Statistics}, 727 | year = {2011}, 728 | volume = {forthcoming}, 729 | owner = {Laurence}, 730 | timestamp = {2011.02.20} 731 | } 732 | 733 | @ARTICLE{HiranoImbens.2001, 734 | author = {Keisuke Hirano and Guido W. Imbens}, 735 | title = {Estimation of causal effects using propensity score weighting: an 736 | application to data on right heart catheterization}, 737 | journal = {Health Services and Outcomes Research Methodology}, 738 | year = {2001}, 739 | volume = {2}, 740 | pages = {259-278}, 741 | owner = {Laurence}, 742 | timestamp = {2010.07.23} 743 | } 744 | 745 | @ARTICLE{HiranoImbensRidder.2003, 746 | author = {Keisuke Hirano and Guido W. Imbens and Geert Ridder}, 747 | title = {Efficient {Estimation} of {Average} {Treatment} {Effects} {Using} 748 | the {Estimated} {Propensity} {Score}}, 749 | journal = {Econometrica}, 750 | year = {2003}, 751 | volume = {71}, 752 | pages = {1161-1189}, 753 | owner = {Laurence}, 754 | timestamp = {2010.01.02} 755 | } 756 | 757 | @ARTICLE{HoerlKennard.1970, 758 | author = {Arthur E. Hoerl and Robert W. Kennard}, 759 | title = {Ridge regression: biased estimation for nonorthogonal problems}, 760 | journal = {Technometrics}, 761 | year = {1970}, 762 | volume = {12}, 763 | pages = {55-67}, 764 | owner = {Laurence}, 765 | timestamp = {2011.05.10} 766 | } 767 | 768 | @ARTICLE{Holland.1986, 769 | author = {Paul W. Holland}, 770 | title = {Statistics and {Causal} {Inference}}, 771 | journal = {Journal of American Statistical Association}, 772 | year = {1986}, 773 | volume = {81}, 774 | pages = {945-960}, 775 | owner = {laurence}, 776 | timestamp = {2013.09.19} 777 | } 778 | 779 | @ARTICLE{Horowitz.1999, 780 | author = {Joel L. Horowitz}, 781 | title = {Semiparametric estimation of a proportional hazard model with unobserved 782 | heterogeneity}, 783 | journal = {Econometrica}, 784 | year = {1999}, 785 | volume = {67}, 786 | pages = {1001-1028}, 787 | owner = {Laurence}, 788 | timestamp = {2010.09.22} 789 | } 790 | 791 | @ARTICLE{Horowitz.1992, 792 | author = {Joel L. Horowitz}, 793 | title = {A smoothed maximum score estimator for the binary response model}, 794 | journal = {Econometrica}, 795 | year = {1992}, 796 | volume = {60}, 797 | pages = {505-531}, 798 | owner = {Laurence}, 799 | timestamp = {2010.09.22} 800 | } 801 | 802 | @ARTICLE{HotzImbensMortimer.2005, 803 | author = {V. Joseph Hotz and Guido W. Imbens and Julie H. Mortimer}, 804 | title = {Predicting the {Efficacy} of {Future} {Training} {Programs} {Using} 805 | {Past} {Experiences} at {Other} {Locations}}, 806 | journal = {Journal of Econometrics}, 807 | year = {2005}, 808 | volume = {125}, 809 | pages = {241-270}, 810 | owner = {laurencium}, 811 | timestamp = {2013.10.18} 812 | } 813 | 814 | @ARTICLE{HuangLiuPourahmadiLiu.2006, 815 | author = {Jianhua Z. Huang and Naiping Liu and Mohsen Pourahmadi and Linxu 816 | Liu}, 817 | title = {Covariance matrix selection and estimation via penalised normal likelihood}, 818 | journal = {Biometrika}, 819 | year = {2006}, 820 | volume = {93}, 821 | pages = {85-98}, 822 | owner = {Laurence}, 823 | timestamp = {2011.03.16} 824 | } 825 | 826 | @ARTICLE{Ichimura.1993, 827 | author = {Hidehiko Ichimura}, 828 | title = {Semiparametric least squares (SLS) and weighted SLS estimation of 829 | single-index models}, 830 | journal = {Journal of Econometrics}, 831 | year = {1993}, 832 | volume = {58}, 833 | pages = {71-120}, 834 | owner = {Laurence}, 835 | timestamp = {2010.01.01} 836 | } 837 | 838 | @ARTICLE{Imbens.2004, 839 | author = {Guido W. Imbens}, 840 | title = {Nonparametric estimation of average treatment effects under exogeneity: 841 | a review}, 842 | journal = {Review of Economics and Statistics}, 843 | year = {2004}, 844 | volume = {86}, 845 | pages = {4-29}, 846 | owner = {Laurence}, 847 | timestamp = {2010.07.23} 848 | } 849 | 850 | @ARTICLE{ImbensAngrist.1994, 851 | author = {Guido W. Imbens and Joshua D. Angrist}, 852 | title = {Identification and {Estimation} of {Local} {Average} {Treatment} 853 | {Effects}}, 854 | journal = {Econometrica}, 855 | year = {1994}, 856 | volume = {62}, 857 | pages = {467-475}, 858 | owner = {Laurence}, 859 | timestamp = {2010.01.01} 860 | } 861 | 862 | @ARTICLE{ImbensLancaster.1996, 863 | author = {Guido W. Imbens and Tony Lancaster}, 864 | title = {Efficient estimation and stratified sampling}, 865 | journal = {Journal of Econometrics}, 866 | year = {1996}, 867 | volume = {74}, 868 | pages = {289-318}, 869 | owner = {Laurence}, 870 | timestamp = {2010.02.08} 871 | } 872 | 873 | @ARTICLE{ImbensLemieux.2008, 874 | author = {Guido W. Imbens and Thomas Lemieux}, 875 | title = {Regression discontinuity designs: a guide to practice}, 876 | journal = {Journal of Econometrics}, 877 | year = {2008}, 878 | volume = {142}, 879 | pages = {615-635}, 880 | owner = {Laurence}, 881 | timestamp = {2010.01.15} 882 | } 883 | 884 | @ARTICLE{ImbensManski.2004, 885 | author = {Guido W. Imbens and Charles F. Manski}, 886 | title = {Confidence {Intervals} for {Partially} {Identified} {Parameters}}, 887 | journal = {Econometrica}, 888 | year = {2004}, 889 | volume = {72}, 890 | pages = {1845-1857}, 891 | owner = {laurencium}, 892 | timestamp = {2013.10.08} 893 | } 894 | 895 | @BOOK{ImbensRubin.2015, 896 | title = {Causal Inference in Statistics, Social, and Biomedical Sciences: 897 | An Introduction}, 898 | publisher = {Cambridge University Press}, 899 | year = {2015}, 900 | author = {Guido W. Imbens and Donald B. Rubin}, 901 | owner = {laurence}, 902 | timestamp = {2014.10.26} 903 | } 904 | 905 | @ARTICLE{ImbensWooldridge.2009, 906 | author = {Guido W. Imbens and Jeffrey M. Wooldridge}, 907 | title = {Recent developments in the econometrics of program evaluation}, 908 | journal = {Journal of Economic Literature}, 909 | year = {2009}, 910 | volume = {47}, 911 | pages = {5-86}, 912 | owner = {Laurence}, 913 | timestamp = {2010.01.15} 914 | } 915 | 916 | @ARTICLE{Keele.2009, 917 | author = {Luke Keele}, 918 | title = {An observational study of ballot initiatives and state outcomes}, 919 | journal = {Working paper}, 920 | year = {2009}, 921 | owner = {Laurence}, 922 | timestamp = {2011.02.20} 923 | } 924 | 925 | @BOOK{Kennedy.2003, 926 | title = {A guide to econometrics}, 927 | publisher = {MIT Press}, 928 | year = {2003}, 929 | author = {Kennedy, Peter E.}, 930 | address = {Cambridge}, 931 | edition = {5th} 932 | } 933 | 934 | @ARTICLE{KitamuraStutzer.1997, 935 | author = {Yuichi Kitamura and Michael Stutzer}, 936 | title = {An information-theoretic alternative to generalized method of moments 937 | estimation}, 938 | journal = {Econometrica}, 939 | year = {1997}, 940 | volume = {65}, 941 | pages = {861-874}, 942 | owner = {Laurence}, 943 | timestamp = {2010.01.01} 944 | } 945 | 946 | @BOOK{Koenker.2005, 947 | title = {Quantile {Regression}}, 948 | publisher = {Cambridge University Press}, 949 | year = {2005}, 950 | author = {Roger Koenker}, 951 | owner = {Laurence}, 952 | timestamp = {2010.01.01} 953 | } 954 | 955 | @ARTICLE{KoenkerBassett.1978, 956 | author = {Roger Koenker and Gilbert Bassett}, 957 | title = {Regression quantiles}, 958 | journal = {Econometrica}, 959 | year = {1978}, 960 | volume = {46}, 961 | pages = {33-50}, 962 | owner = {Laurence}, 963 | timestamp = {2010.01.01} 964 | } 965 | 966 | @ARTICLE{LaLonde.1986, 967 | author = {Robert J. {LaLonde}}, 968 | title = {Evaluating the Econometric Evaluations of Training Programs with 969 | Experimental Data}, 970 | journal = {American Economic Review}, 971 | year = {1986}, 972 | volume = {76}, 973 | pages = {604-620}, 974 | owner = {laurencium}, 975 | timestamp = {2013.11.14} 976 | } 977 | 978 | @BOOK{Lancaster.1990, 979 | title = {The econometrics analysis of transition data}, 980 | publisher = {Cambridge University Press}, 981 | year = {1990}, 982 | author = {Tony Lancaster}, 983 | owner = {Laurence}, 984 | timestamp = {2010.02.18} 985 | } 986 | 987 | @ARTICLE{Lancaster.1979, 988 | author = {Tony Lancaster}, 989 | title = {Econometric methods for the duration of unemployment}, 990 | journal = {Econometrica}, 991 | year = {1979}, 992 | volume = {47}, 993 | pages = {939-956}, 994 | owner = {Laurence}, 995 | timestamp = {2010.02.22} 996 | } 997 | 998 | @ARTICLE{Lechner.2002, 999 | author = {Michael Lechner}, 1000 | title = {Program {Heterogeneity} and {Propensity} {Score} {Matching}: {An} 1001 | {Application} to the {Evaluation} of {Active} {Labor} {Market} {Policies}}, 1002 | journal = {Review of Economics and Statistics}, 1003 | year = {2002}, 1004 | volume = {84}, 1005 | pages = {205-220}, 1006 | owner = {laurencium}, 1007 | timestamp = {2013.10.18} 1008 | } 1009 | 1010 | @ARTICLE{LedoitWolf.2004, 1011 | author = {Olivier Ledoit and Michael Wolf}, 1012 | title = {A well-conditioned estimator for large-dimensional covariance matrices}, 1013 | journal = {Journal of Multivariate Analysis}, 1014 | year = {2004}, 1015 | volume = {88}, 1016 | pages = {365-411}, 1017 | owner = {Laurence}, 1018 | timestamp = {2011.03.16} 1019 | } 1020 | 1021 | @BOOK{LehmannRomano.2005, 1022 | title = {Testing statistical hypothesis}, 1023 | publisher = {Springer}, 1024 | year = {2005}, 1025 | author = {E. L. Lehmann and Joseph P. Romano}, 1026 | address = {New York}, 1027 | edition = {3rd}, 1028 | owner = {alumni}, 1029 | timestamp = {2010.04.30} 1030 | } 1031 | 1032 | @ARTICLE{Lovell.1963, 1033 | author = {Lovell, Michael C.}, 1034 | title = {Seasonal adjustment of economic time series}, 1035 | journal = {Journal of the American Statistical Association}, 1036 | year = {1963}, 1037 | volume = {58}, 1038 | pages = {993--1010} 1039 | } 1040 | 1041 | @ARTICLE{MacKinnonWhite.1985, 1042 | author = {MacKinnon, James Gordon and White, Halbert}, 1043 | title = {Some heteroskedasticity consistent covariance matrix estimators with 1044 | improved finite sample properties}, 1045 | journal = {Journal of Econometrics}, 1046 | year = {1985}, 1047 | volume = {29}, 1048 | pages = {305-325} 1049 | } 1050 | 1051 | @ARTICLE{Makarov.1981, 1052 | author = {Makarov, G. D.}, 1053 | title = {Estimates for the {Distribution} {Function} of a {Sum} of {Two} {Random} 1054 | {Variables} when the {Marginal} {Distributions} are {Fixed}}, 1055 | journal = {Theory of Probability and its Applications}, 1056 | year = {1981}, 1057 | volume = {26}, 1058 | pages = {803-806}, 1059 | owner = {laurencium}, 1060 | timestamp = {2013.11.14} 1061 | } 1062 | 1063 | @BOOK{Manski.2007, 1064 | title = {{Identification for Prediction and Decision}}, 1065 | publisher = {Harvard University Press}, 1066 | year = {2007}, 1067 | author = {Charles F. Manski}, 1068 | address = {MA}, 1069 | owner = {laurencium}, 1070 | timestamp = {2013.11.15} 1071 | } 1072 | 1073 | @BOOK{Manski.2003, 1074 | title = {Partial {Identification} of {Probability} {Distributions}}, 1075 | publisher = {Springer-Verlag}, 1076 | year = {2003}, 1077 | author = {Charles F. Manski}, 1078 | address = {New York}, 1079 | owner = {laurencium}, 1080 | timestamp = {2013.11.15} 1081 | } 1082 | 1083 | @ARTICLE{Manski.1985, 1084 | author = {Charles F. Manski}, 1085 | title = {Semiparametric analysis of discrete response: asymptotic properties 1086 | of the maximum score estimator}, 1087 | journal = {Journal of Econometrics}, 1088 | year = {1985}, 1089 | volume = {27}, 1090 | pages = {313-333}, 1091 | owner = {Laurence}, 1092 | timestamp = {2010.01.01} 1093 | } 1094 | 1095 | @ARTICLE{Manski.1975, 1096 | author = {Charles F. Manski}, 1097 | title = {Maximum score estimation of the stochastic utility model of choice}, 1098 | journal = {Journal of Econometrics}, 1099 | year = {1975}, 1100 | volume = {3}, 1101 | pages = {205-228}, 1102 | owner = {Laurence}, 1103 | timestamp = {2010.01.01} 1104 | } 1105 | 1106 | @BOOK{MardiaKentBibby.1979, 1107 | title = {Multivariate Analysis}, 1108 | publisher = {Academic Press}, 1109 | year = {1979}, 1110 | author = {Mardia, K. T. and Kent, J. T. and Bibby, J. M.}, 1111 | address = {London}, 1112 | owner = {alumni}, 1113 | timestamp = {2010.04.30} 1114 | } 1115 | 1116 | @ARTICLE{McFadden.1989, 1117 | author = {Daniel McFadden}, 1118 | title = {A method of simulated moments for estimation of discrete response 1119 | models without numerical integration}, 1120 | journal = {Econometrica}, 1121 | year = {1989}, 1122 | volume = {57}, 1123 | pages = {995-1026}, 1124 | owner = {Laurence}, 1125 | timestamp = {2010.01.01} 1126 | } 1127 | 1128 | @ARTICLE{McKeeRivkinSims.2010, 1129 | author = {Graham J. McKee and Steven G. Rivkin and Katherine R. E. Sims}, 1130 | title = {Disruption, {Achievement} and the {Heterogeneous} {Benefits} of {Smaller} 1131 | {Classes}}, 1132 | journal = {NBER Working Paper No. 15812}, 1133 | year = {2010}, 1134 | owner = {laurencium}, 1135 | timestamp = {2013.10.18} 1136 | } 1137 | 1138 | @ARTICLE{Meyer.1990, 1139 | author = {Bruce D. Meyer}, 1140 | title = {Unemployment insurance and unemployment spells}, 1141 | journal = {Econometrica}, 1142 | year = {1990}, 1143 | volume = {58}, 1144 | pages = {757-782}, 1145 | owner = {Laurence}, 1146 | timestamp = {2010.02.20} 1147 | } 1148 | 1149 | @ARTICLE{Newey.1994, 1150 | author = {Whitney K. Newey}, 1151 | title = {The asymptotic variance of semiparametric estimators}, 1152 | journal = {Econometrica}, 1153 | year = {1994}, 1154 | volume = {62}, 1155 | pages = {1349-1382}, 1156 | owner = {Laurence}, 1157 | timestamp = {2010.01.01} 1158 | } 1159 | 1160 | @ARTICLE{Newey.1991, 1161 | author = {Whitney K. Newey}, 1162 | title = {Uniform convergence in probability and stochastic equicontinuity}, 1163 | journal = {Econometrica}, 1164 | year = {1991}, 1165 | volume = {59}, 1166 | pages = {1161-1167}, 1167 | owner = {Laurence}, 1168 | timestamp = {2010.09.13} 1169 | } 1170 | 1171 | @ARTICLE{Newey.1990, 1172 | author = {Whitney K. Newey}, 1173 | title = {Semiparametric efficiency bounds}, 1174 | journal = {Journal of Applied Econometrics}, 1175 | year = {1990}, 1176 | volume = {5}, 1177 | pages = {99-135}, 1178 | owner = {Laurence}, 1179 | timestamp = {2010.01.01} 1180 | } 1181 | 1182 | @INCOLLECTION{NeweyMcFadden.1994, 1183 | author = {Whitney K. Newey and Daniel McFadden}, 1184 | title = {Large sample estimation and hypothesis testing}, 1185 | booktitle = {Handbook of Econometrics}, 1186 | publisher = {North-Holland}, 1187 | year = {1994}, 1188 | editor = {Robert F. Engle and Daniel McFadden}, 1189 | volume = {4}, 1190 | pages = {2111-2245}, 1191 | owner = {Laurence}, 1192 | timestamp = {2010.01.01} 1193 | } 1194 | 1195 | @ARTICLE{NeweyWest.1987, 1196 | author = {Newey, Whitney K. and West, Kenneth D.}, 1197 | title = {A simple, positive semi-definite, heteroskedasticity and autocorrelation 1198 | consistent covariance matrix}, 1199 | journal = {Econometrica}, 1200 | year = {1987}, 1201 | volume = {55}, 1202 | pages = {703--708} 1203 | } 1204 | 1205 | @ARTICLE{NgPerron.1995, 1206 | author = {Serena Ng and Pierre Perron}, 1207 | title = {Unit root tests in ARMA models with data-dependent methods for the 1208 | selection of the truncation lag}, 1209 | journal = {Journal of American Statistical Association}, 1210 | year = {1995}, 1211 | volume = {90}, 1212 | pages = {268-281}, 1213 | owner = {Laurence}, 1214 | timestamp = {2011.06.09} 1215 | } 1216 | 1217 | @ARTICLE{OlleyPakes.1996, 1218 | author = {G. Steven Olley and Ariel Pakes}, 1219 | title = {The dynamics of productivity in the telecommunications equipment 1220 | industry}, 1221 | journal = {Econometrica}, 1222 | year = {1996}, 1223 | volume = {64}, 1224 | pages = {1263-1297}, 1225 | owner = {Laurence}, 1226 | timestamp = {2010.01.01} 1227 | } 1228 | 1229 | @BOOK{PaganUllah.1999, 1230 | title = {Nonparametric econometrics}, 1231 | publisher = {Cambridge University Press}, 1232 | year = {1999}, 1233 | author = {Adrian Pagan and Aman Ullah}, 1234 | owner = {Laurence}, 1235 | timestamp = {2010.01.01} 1236 | } 1237 | 1238 | @ARTICLE{Pakes.1986, 1239 | author = {Ariel Pakes}, 1240 | title = {Patents as options: some estimates of the value of holding European 1241 | patent stocks}, 1242 | journal = {Econometrica}, 1243 | year = {1986}, 1244 | volume = {54}, 1245 | pages = {755-784}, 1246 | owner = {Laurence}, 1247 | timestamp = {2010.01.01} 1248 | } 1249 | 1250 | @ARTICLE{Park.2012, 1251 | author = {Byoung G. Park}, 1252 | title = {Nonparametric {Identification} and {Estimation} of the {Extended} 1253 | {Roy} {Model}}, 1254 | journal = {Job Market Paper}, 1255 | year = {2012}, 1256 | owner = {laurencium}, 1257 | timestamp = {2013.11.14} 1258 | } 1259 | 1260 | @ARTICLE{Pesaran.2006, 1261 | author = {M. Hashem Pesaran}, 1262 | title = {Estimation and inference in large heterogeneous panels with a multifactor 1263 | error structure}, 1264 | journal = {Econometrica}, 1265 | year = {2006}, 1266 | volume = {74}, 1267 | pages = {967-1012}, 1268 | owner = {Laurence}, 1269 | timestamp = {2010.01.07} 1270 | } 1271 | 1272 | @ARTICLE{PhillipsPerron.1988, 1273 | author = {Peter C. B. Phillips and Pierre Perron}, 1274 | title = {Testing for a unit root in time series regression}, 1275 | journal = {Biometrika}, 1276 | year = {1988}, 1277 | volume = {75}, 1278 | pages = {335-346}, 1279 | owner = {Laurence}, 1280 | timestamp = {2011.06.09} 1281 | } 1282 | 1283 | @ARTICLE{DimitrisRomano.1994, 1284 | author = {Dimitris N. Politis and Joseph P. Romano}, 1285 | title = {Large sample confidence regions based on subsamples under minimal 1286 | assumptions}, 1287 | journal = {The Annals of Statistics}, 1288 | year = {1994}, 1289 | volume = {22}, 1290 | pages = {2031-2050}, 1291 | owner = {alumni}, 1292 | timestamp = {2010.05.13} 1293 | } 1294 | 1295 | @BOOK{PolitisRomanoWolf.1999, 1296 | title = {Subsampling}, 1297 | publisher = {Springer}, 1298 | year = {1999}, 1299 | author = {Dimitris N. Politis and Joseph P. Romano and Michael Wolf}, 1300 | address = {New York}, 1301 | owner = {laurencium}, 1302 | timestamp = {2013.10.18} 1303 | } 1304 | 1305 | @BOOK{Pollard.1984, 1306 | title = {Convergence of stochastic processes}, 1307 | publisher = {Springer-Verlag}, 1308 | year = {1984}, 1309 | author = {David Pollard}, 1310 | address = {New York}, 1311 | owner = {alumni}, 1312 | timestamp = {2010.04.30} 1313 | } 1314 | 1315 | @ARTICLE{Portnoy.1985, 1316 | author = {Stephen Portnoy}, 1317 | title = {Asymptotic behavior of $M$-estimators of $p$ regression parameters 1318 | when $p^2/n$ is large. II. Normal approximatoin}, 1319 | journal = {Annals of Statistics}, 1320 | year = {1985}, 1321 | volume = {13}, 1322 | pages = {1403-1417}, 1323 | owner = {Laurence}, 1324 | timestamp = {2011.03.16} 1325 | } 1326 | 1327 | @ARTICLE{Portnoy.1984, 1328 | author = {Stephen Portnoy}, 1329 | title = {Asymptotic behavior of $M$-estimators of $p$ regression parameters 1330 | when $p^2/n$ is large. I. Consistency}, 1331 | journal = {Annals of Statistics}, 1332 | year = {1984}, 1333 | volume = {12}, 1334 | pages = {1298-1309}, 1335 | owner = {Laurence}, 1336 | timestamp = {2011.03.16} 1337 | } 1338 | 1339 | @ARTICLE{Powell.1984, 1340 | author = {James L. Powell}, 1341 | title = {Least absolute deviations estimation for the censored regression 1342 | model}, 1343 | journal = {Journal of Econometrics}, 1344 | year = {1984}, 1345 | volume = {25}, 1346 | pages = {303-325}, 1347 | owner = {Laurence}, 1348 | timestamp = {2010.01.01} 1349 | } 1350 | 1351 | @ARTICLE{PrenticeGloeckler.1978, 1352 | author = {R. L. Prentice and L. A. Gloeckler}, 1353 | title = {Regression analysis of grouped survival data with application to 1354 | breast cancer data}, 1355 | journal = {Biometrics}, 1356 | year = {1978}, 1357 | volume = {34}, 1358 | pages = {57-67}, 1359 | owner = {Laurence}, 1360 | timestamp = {2010.02.22} 1361 | } 1362 | 1363 | @ARTICLE{QinLawless.1994, 1364 | author = {Jing Qin and Jerry Lawless}, 1365 | title = {Empirical likelihood and general estimating equations}, 1366 | journal = {The Annals of Statistics}, 1367 | year = {1994}, 1368 | volume = {22}, 1369 | pages = {300-325}, 1370 | owner = {Laurence}, 1371 | timestamp = {2010.01.01} 1372 | } 1373 | 1374 | @ARTICLE{Robinson.1988, 1375 | author = {Robinson, P. M.}, 1376 | title = {Root-N-consistent semiparametric regression}, 1377 | journal = {Econometrica}, 1378 | year = {1988}, 1379 | volume = {56}, 1380 | pages = {931-954}, 1381 | owner = {Laurence}, 1382 | timestamp = {2010.09.04} 1383 | } 1384 | 1385 | @ARTICLE{Robinson.1987, 1386 | author = {Peter M. Robinson}, 1387 | title = {Asymptotically efficient estimation in the presence of heteroskedasticity 1388 | of unknown form}, 1389 | journal = {Econometrica}, 1390 | year = {1987}, 1391 | volume = {55}, 1392 | pages = {875-891}, 1393 | owner = {Laurence}, 1394 | timestamp = {2010.01.01} 1395 | } 1396 | 1397 | @ARTICLE{RomanoShaikh.2008, 1398 | author = {Joseph P. Romano and Azeem M. Shaikh}, 1399 | title = {Inference for {Identifiable} {Parameters} in {Partially} {Identified} 1400 | {Econometric} {Models}}, 1401 | journal = {Journal of Statistical Planning and Inference}, 1402 | year = {2008}, 1403 | volume = {138}, 1404 | pages = {2786-2807}, 1405 | owner = {laurence}, 1406 | timestamp = {2012.11.01} 1407 | } 1408 | 1409 | @ARTICLE{Rosenbaum.1984, 1410 | author = {Paul R. Rosenbaum}, 1411 | title = {The consequences of adjustment for a concomitant variable that has 1412 | been affected by the treatment}, 1413 | journal = {Journal of the Royal Statistical Society, Series A}, 1414 | year = {1984}, 1415 | volume = {147}, 1416 | pages = {656-666}, 1417 | owner = {Laurence}, 1418 | timestamp = {2010.08.07} 1419 | } 1420 | 1421 | @ARTICLE{RosenbaumRubin.1983, 1422 | author = {Paul R. Rosenbaum and Donald B. Rubin}, 1423 | title = {The central role of the propensity score in observational studies 1424 | for causal effects}, 1425 | journal = {Biometrika}, 1426 | year = {1983}, 1427 | volume = {70}, 1428 | pages = {41-55}, 1429 | owner = {Laurence}, 1430 | timestamp = {2010.01.01} 1431 | } 1432 | 1433 | @ARTICLE{Rubin.1976, 1434 | author = {Donald B. Rubin}, 1435 | title = {Inference and missing data}, 1436 | journal = {Biometrika}, 1437 | year = {1976}, 1438 | volume = {63}, 1439 | pages = {581-592}, 1440 | owner = {Laurence}, 1441 | timestamp = {2010.02.08} 1442 | } 1443 | 1444 | @ARTICLE{Rubin.1974, 1445 | author = {Donald B. Rubin}, 1446 | title = {Estimating {Causal} {Effects} of {Treatments} in {Randomized} and 1447 | {Nonrandomized} {Studies}}, 1448 | journal = {Journal of Educational Psychology}, 1449 | year = {1974}, 1450 | volume = {66}, 1451 | pages = {688-701}, 1452 | owner = {Laurence}, 1453 | timestamp = {2010.03.03} 1454 | } 1455 | 1456 | @ARTICLE{Rust.1987, 1457 | author = {John Rust}, 1458 | title = {Optimal replacement of GMC bus engines: an empirical model of Harold 1459 | Zurcher}, 1460 | journal = {Econometrica}, 1461 | year = {1987}, 1462 | volume = {55}, 1463 | pages = {999-1033}, 1464 | owner = {Laurence}, 1465 | timestamp = {2010.01.01} 1466 | } 1467 | 1468 | @BOOK{Ruud.2000, 1469 | title = {An introduction to classical econometric theory}, 1470 | publisher = {Oxford University Press}, 1471 | year = {2000}, 1472 | author = {Paul A. Ruud}, 1473 | address = {Oxford}, 1474 | owner = {Laurence}, 1475 | timestamp = {2010.01.01} 1476 | } 1477 | 1478 | @ARTICLE{SaidDickey.1984, 1479 | author = {Said E. Said and David A. Dickey}, 1480 | title = {Testing for unit roots in autoregressive-moving average models of 1481 | unknown order}, 1482 | journal = {Biometrika}, 1483 | year = {1984}, 1484 | volume = {71}, 1485 | pages = {599-607}, 1486 | owner = {Laurence}, 1487 | timestamp = {2011.06.09} 1488 | } 1489 | 1490 | @ARTICLE{SansoNavarro.2011, 1491 | author = {Marcos Sanso-Navarro}, 1492 | title = {The effects on American foreign direct investment in the United Kingdom 1493 | from not adopting the Euro}, 1494 | journal = {Journal of Common Market Studies}, 1495 | year = {2011}, 1496 | volume = {49}, 1497 | pages = {463-483}, 1498 | owner = {Laurence}, 1499 | timestamp = {2011.02.21} 1500 | } 1501 | 1502 | @BOOK{Shiryaev.1995, 1503 | title = {Probability}, 1504 | publisher = {Springer}, 1505 | year = {1995}, 1506 | author = {A. N. Shiryaev}, 1507 | address = {New York}, 1508 | edition = {2nd}, 1509 | owner = {alumni}, 1510 | timestamp = {2010.04.30} 1511 | } 1512 | 1513 | @ARTICLE{Sommer.1986, 1514 | author = {Alfred Sommer and Edi Djunaedi and A. A. Loeden and Ignatius Tarwotjo 1515 | and Keith P. West and Robert Tilden and Lisa Mele}, 1516 | title = {Impact of {Vitamin} {A} {Supplementation} on {Childhood} {Mortality}: 1517 | {A} {Randomised} {Controlled} {Community} {Trial}}, 1518 | journal = {The Lancet}, 1519 | year = {1986}, 1520 | volume = {327}, 1521 | pages = {1169-1173}, 1522 | owner = {laurencium}, 1523 | timestamp = {2013.11.19} 1524 | } 1525 | 1526 | @ARTICLE{StaigerStock.1997, 1527 | author = {Douglas Staiger and James H. Stock}, 1528 | title = {Instrumental variables regression with weak instruments}, 1529 | journal = {Econometrica}, 1530 | year = {1997}, 1531 | volume = {65}, 1532 | pages = {557-586}, 1533 | owner = {Laurence}, 1534 | timestamp = {2010.01.01} 1535 | } 1536 | 1537 | @ARTICLE{StockWatson.2008, 1538 | author = {James H. Stock and Mark W. Watson}, 1539 | title = {Heteroskedasticity-robust standard errors for fixed effects panel 1540 | data regression}, 1541 | journal = {Econometrica}, 1542 | year = {2008}, 1543 | volume = {76}, 1544 | pages = {155174}, 1545 | owner = {Laurence}, 1546 | timestamp = {2010.01.07} 1547 | } 1548 | 1549 | @ARTICLE{StockWright.2000, 1550 | author = {James H. Stock and Jonathan H. Wright}, 1551 | title = {GMM with weak identification}, 1552 | journal = {Econometrica}, 1553 | year = {2000}, 1554 | volume = {68}, 1555 | pages = {1055-1096}, 1556 | owner = {Laurence}, 1557 | timestamp = {2010.01.01} 1558 | } 1559 | 1560 | @ARTICLE{Stute.1986a, 1561 | author = {Winfried Stute}, 1562 | title = {Conditional empirical processes}, 1563 | journal = {Annals of Statistics}, 1564 | year = {1986}, 1565 | volume = {14}, 1566 | pages = {638-647}, 1567 | owner = {Laurence}, 1568 | timestamp = {2011.05.07} 1569 | } 1570 | 1571 | @ARTICLE{Stute.1986b, 1572 | author = {Winfried Stute}, 1573 | title = {On almost sure convergence of conditional empirical distribution 1574 | functions}, 1575 | journal = {Annals of Probability}, 1576 | year = {1986}, 1577 | volume = {14}, 1578 | pages = {891-901}, 1579 | owner = {Laurence}, 1580 | timestamp = {2011.05.07} 1581 | } 1582 | 1583 | @ARTICLE{Tibshirani.1996, 1584 | author = {Robert Tibshirani}, 1585 | title = {Regression shrinkage and selection via the lasso}, 1586 | journal = {Journal of the Royal Statistical Society, Series B}, 1587 | year = {1996}, 1588 | volume = {58}, 1589 | pages = {267-288}, 1590 | owner = {Laurence}, 1591 | timestamp = {2011.03.16} 1592 | } 1593 | 1594 | @BOOK{Train.2009, 1595 | title = {Discrete choice methods with simulation}, 1596 | publisher = {Cambridge University Press}, 1597 | year = {2009}, 1598 | author = {Kenneth E. Train}, 1599 | edition = {2nd}, 1600 | owner = {Laurence}, 1601 | timestamp = {2010.01.04} 1602 | } 1603 | 1604 | @ARTICLE{Vytlacil.2002, 1605 | author = {Edward Vytlacil}, 1606 | title = {Independence, monotonicity, and latent index models: an equivalence 1607 | result}, 1608 | journal = {Econometrica}, 1609 | year = {2002}, 1610 | volume = {70}, 1611 | pages = {331-341}, 1612 | owner = {Laurence}, 1613 | timestamp = {2010.07.12} 1614 | } 1615 | 1616 | @ARTICLE{White.1980, 1617 | author = {White, Halbert}, 1618 | title = {A heteroskedasticity-consistent covariance matrix estimator and a 1619 | direct test for heteroskedasticity}, 1620 | journal = {Econometrica}, 1621 | year = {1980}, 1622 | volume = {48}, 1623 | pages = {817--838} 1624 | } 1625 | 1626 | @ARTICLE{WilliamsonDowns.1990, 1627 | author = {Robert C. Williamson and Tom Downs}, 1628 | title = {Probabilitistic {Arithmetic} {I}: {Numerical} {Methods} for {Calculating} 1629 | {Convolutions} and {Dependency} {Bounds}}, 1630 | journal = {International Journal of Approximate Reasoning}, 1631 | year = {1990}, 1632 | volume = {4}, 1633 | pages = {89-158}, 1634 | owner = {laurence}, 1635 | timestamp = {2012.10.29} 1636 | } 1637 | 1638 | @ARTICLE{Wooldridge.2007, 1639 | author = {Jeffrey M. Wooldridge}, 1640 | title = {Inverse probability weighted estimation for general missing data 1641 | problem}, 1642 | journal = {Journal of Econometrics}, 1643 | year = {2007}, 1644 | volume = {141}, 1645 | pages = {1281-1301}, 1646 | owner = {Laurence}, 1647 | timestamp = {2010.02.08} 1648 | } 1649 | 1650 | @ARTICLE{Wooldridge.2005, 1651 | author = {Jeffrey M. Wooldridge}, 1652 | title = {Fixed-effects and related estimators for correlated random-coefficient 1653 | and treatment-effect panel data models}, 1654 | journal = {Review of Economics and Statistics}, 1655 | year = {2005}, 1656 | volume = {87}, 1657 | pages = {385-390}, 1658 | owner = {Laurence}, 1659 | timestamp = {2010.08.05} 1660 | } 1661 | 1662 | @ARTICLE{Wooldridge.2003, 1663 | author = {Jeffrey M. Wooldridge}, 1664 | title = {Cluster-sample methods in applied econometrics}, 1665 | journal = {American Economic Review}, 1666 | year = {2003}, 1667 | volume = {93}, 1668 | pages = {133-138}, 1669 | owner = {Laurence}, 1670 | timestamp = {2010.01.21} 1671 | } 1672 | 1673 | @BOOK{Wooldridge.2002, 1674 | title = {Econometric analysis of cross section and panel data}, 1675 | publisher = {MIT Press}, 1676 | year = {2002}, 1677 | author = {Wooldridge, Jeffrey M.}, 1678 | address = {Cambridge} 1679 | } 1680 | 1681 | @ARTICLE{Wooldridge.2001, 1682 | author = {Jeffrey M. Wooldridge}, 1683 | title = {Asymptotic properties of weighted m-estimators for standard stratified 1684 | samples}, 1685 | journal = {Econometric Theory}, 1686 | year = {2001}, 1687 | volume = {17}, 1688 | pages = {451-470}, 1689 | owner = {Laurence}, 1690 | timestamp = {2010.01.21} 1691 | } 1692 | 1693 | @ARTICLE{Wooldridge.1999, 1694 | author = {Jeffrey M. Wooldridge}, 1695 | title = {Asymptotic properties of weighted M-estimators for variable probability 1696 | samples}, 1697 | journal = {Econometrica}, 1698 | year = {1999}, 1699 | volume = {67}, 1700 | pages = {1385-1406}, 1701 | owner = {Laurence}, 1702 | timestamp = {2010.01.21} 1703 | } 1704 | 1705 | @ARTICLE{YuanLin.2007, 1706 | author = {Ming Yuan and Yi Lin}, 1707 | title = {Model selection and estimation in the Gaussian graphical model}, 1708 | journal = {Biometrika}, 1709 | year = {2007}, 1710 | volume = {94}, 1711 | pages = {19-35}, 1712 | owner = {Laurence}, 1713 | timestamp = {2011.03.16} 1714 | } 1715 | 1716 | -------------------------------------------------------------------------------- /docs/tex/vignette.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/laurencium/Causalinference/630e8fb195754a720da41791b725d3dadabfb257/docs/tex/vignette.pdf -------------------------------------------------------------------------------- /docs/tex/vignette.tex: -------------------------------------------------------------------------------- 1 | 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 5 | 6 | 7 | \documentclass[12pt]{article} 8 | 9 | 10 | \usepackage{amsmath, amsthm, amssymb, setspace, fullpage, apacite, enumitem, listings} 11 | \usepackage[margin=0.7in]{geometry} 12 | \usepackage[english]{babel} 13 | 14 | \renewcommand{\qedsymbol}{$\scriptstyle \blacksquare$} 15 | \renewcommand{\vec}[1]{\mbox{\boldmath$#1$}} 16 | \newcommand{\dto}{\overset{d}{\to}} 17 | \newcommand{\pto}{\overset{p}{\to}} 18 | \newcommand{\E}{\mathrm{E}} 19 | \newcommand{\F}{\mathfrak{F}} 20 | \newcommand{\I}{\mathrm{I}} 21 | \newcommand{\M}{\mathfrak{M}} 22 | \newcommand{\N}{\mathrm{N}} 23 | \newcommand{\diag}{\mathrm{diag}} 24 | \renewcommand{\P}{\mathrm{P}} 25 | \newcommand{\Q}{\mathrm{Q}} 26 | \newcommand{\Cov}{\mathrm{Cov}} 27 | \newcommand{\Var}{\mathrm{Var}} 28 | \newcommand{\betav}{\vec{\beta}} 29 | \newcommand{\betahat}{\hat{\vec{\beta}}} 30 | \newcommand{\argmax}{\operatornamewithlimits{argmax}} 31 | \newcommand{\argmin}{\operatornamewithlimits{argmin}} 32 | \newcommand{\plim}{\operatornamewithlimits{plim}} 33 | \newcommand{\interior}{\operatornamewithlimits{int}} 34 | 35 | \newcommand\independent{\protect\mathpalette{\protect\independenT}{\perp}} 36 | \def\independenT#1#2{\mathrel{\setbox0\hbox{$#1#2$}% 37 | \copy0\kern-\wd0\mkern4mu\box0}} % statistical independence symbol 38 | 39 | \newtheorem{thm}{Theorem}[section] 40 | \newtheorem{corollary}[thm]{Corollary} 41 | \newtheorem{lemma}[thm]{Lemma} 42 | \newtheorem{axiom}[thm]{Axiom} 43 | 44 | \theoremstyle{definition} 45 | \newtheorem{defn}[thm]{Definition} 46 | 47 | \theoremstyle{definition} 48 | \newtheorem{example}[thm]{Example} 49 | 50 | \theoremstyle{definition} 51 | \newtheorem{assumption}[thm]{Assumption} 52 | 53 | \theoremstyle{remark} 54 | \newtheorem{remark}[thm]{Remark} 55 | 56 | 57 | \onehalfspace 58 | %\setlength{\parskip}{1ex} 59 | 60 | 61 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 62 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 63 | 64 | \begin{document} 65 | 66 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 67 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 68 | 69 | \title{Causal Inference in Python: A Vignette} 70 | \author{Laurence Wong} 71 | \maketitle 72 | 73 | This document illustrates the use of \textit{Causalinference} with a simple simulated data set. We begin with some basic definitions. 74 | 75 | \section{Setting and Notation} 76 | 77 | As is standard in the literature, we work within the framework of Rubin's potential outcome model \cite{Rubin.1974}. 78 | 79 | Let $Y(0)$ denote the potential outcome of a subject in the absence of treatment, and let $Y(1)$ denote the unit's potential outcome when it is treated. Let $D$ denote treatment status, with $D=1$ indicating treatment and $D=0$ indicating control, and let $X$ be a $K$-column vector of covariates or individual characteristics. 80 | 81 | For unit $i$, $i=1,2,\ldots,N$, the observed outcome can be written as 82 | \[Y_i = (1-D_i) Y_i(0) + D_i Y_i(1).\] 83 | The set of observables $(Y_i, D_i, X_i)$, $i=1,2,\ldots,N$, forms the basic input data set for \textit{Causalinference}. 84 | 85 | \textit{Causalinference} is appropriate for settings in which treatment can be said to be \textit{strongly ignorable}, as defined in Rosenbaum and Rubin \citeyear{RosenbaumRubin.1983}. That is, for all $x$ in the support of $X$, we have 86 | \begin{itemize} 87 | \item[(i)] Unconfoundedness: $D$ is independent of $\big(Y(0), Y(1)\big)$ conditional on $X=x$; 88 | \item[(ii)] Overlap: $c < \P(D=1|X=x) < 1-c$, for some $c>0$. 89 | \end{itemize} 90 | 91 | In the following, we illustrate the typical flow of a causal analysis using the tools of \textit{Causalinference} and a simulated data set. In simulating the data, we specified a constant treatment effect of 10 for simplicity, and incorporated systematic overlap issues and nonlinearities to highlight a number of tools in the package. We focus mostly on illustrating the use of \textit{Causalinference}; for details on methodology please refer to Imbens and Rubin \citeyear{ImbensRubin.2015}. 92 | 93 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 94 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 95 | 96 | \section{Initialization} \label{sec.a} 97 | 98 | The main object of interest in \textit{Causalinference} is the class \texttt{CausalModel}, which we can import with 99 | \begin{verbatim} 100 | >>> from causalinference import CausalModel 101 | \end{verbatim} 102 | \texttt{CausalModel} takes as inputs three NumPy arrays: \texttt{Y}, an $N$-vector of observed outcomes; \texttt{D}, an $N$-vector of treatment status indicators; and \texttt{X}, an $N$-by-$K$ matrix of covariates. To initialize a \texttt{CausalModel} instance, simply run: 103 | \begin{verbatim} 104 | >>> causal = CausalModel(Y, D, X) 105 | \end{verbatim} 106 | 107 | Once an instance of the class \texttt{CausalModel} has been created, it will contain a number of attributes and methods that are relevant for conducting causal analyses. Tables \ref{tab.a} and \ref{tab.b} contain a brief description of these attributes and methods. 108 | 109 | \texttt{CausalModel} is \textit{stateful}. As we employ some of the methods to be discussed subsequently, the instance \texttt{causal} will mutate, with new data being added or existing data being modified or dropped. Running 110 | \begin{verbatim} 111 | >>> causal.reset() 112 | \end{verbatim} 113 | will return \texttt{causal} to its initial state. 114 | 115 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 116 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 117 | 118 | \section{Summary Statistics} \label{sec.b} 119 | 120 | Once \texttt{CausalModel} has been instantiated, basic summary statistics will be computed and stored in the attribute \texttt{summary\_stats}. We can display it by running: 121 | \begin{verbatim} 122 | >>> print(causal.summary_stats) 123 | \end{verbatim} 124 | \begin{verbatim} 125 | Summary Statistics 126 | 127 | Controls (N_c=392) Treated (N_t=608) 128 | Variable Mean S.d. Mean S.d. Raw-diff 129 | -------------------------------------------------------------------------------- 130 | Y 43.097 31.353 90.911 41.815 47.814 131 | 132 | Controls (N_c=392) Treated (N_t=608) 133 | Variable Mean S.d. Mean S.d. Nor-diff 134 | -------------------------------------------------------------------------------- 135 | X0 3.810 2.950 5.762 2.566 0.706 136 | X1 3.436 2.848 5.849 2.634 0.880 137 | \end{verbatim} 138 | 139 | The attribute \texttt{summary\_stats} is in reality just a dictionary-like object with special method defined to enable the display of the above table. In many situations it is more convenient to simply access the relevant statistic directly. To retrieve the vector of covariate means for the treatment group, for example, we simply run: 140 | \begin{verbatim} 141 | >>> causal.summary_stats['X_t_mean'] 142 | array([ 5.76232357, 5.8489734 ]) 143 | \end{verbatim} 144 | 145 | Since \texttt{summary\_stats} behaves like a dictionary, it is equipped with the usual Python dictionary methods. To list the dictionary keys, for instance, we go: 146 | \begin{verbatim} 147 | >>> causal.summary_stats.keys() 148 | ['Y_c_mean', 'X_t_sd', 'N_t', 'K', 'ndiff', 'N', 'Y_t_sd', 'rdiff', 'Y_t_mean', 149 | 'X_c_mean', 'X_t_mean', 'Y_c_sd', 'X_c_sd', 'N_c'] 150 | \end{verbatim} 151 | 152 | Here \texttt{rdiff} refers to the difference in average observed outcomes between treatment and control groups. \texttt{ndiff}, on the other hand, refers to the normalized differences in average covariates, defined as 153 | \[\frac{\bar{X}_{k,t} - \bar{X}_{k,c}}{\sqrt{\left(s^2_{k,t}+s^2_{k,c}\right)\Big/ 2}},\] 154 | where $\bar{X}_{k,t}$ and $s_{k,t}$ are the sample mean and sample standard deviation of the $k$th covariate of the treatment group, and $\bar{X}_{k,c}$ and $s_{k,c}$ are the analogous statistics for the control group. 155 | 156 | The normalized differences in average covariates provide a way to measure the covariate balance between the treatment and the control groups. Unlike the t-statistic, its absolute magnitude does not increase (in expectation) as the sample size increases. 157 | 158 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 159 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 160 | 161 | \section{Least Squares Estimation} 162 | 163 | One of the simplest treatment effect estimators is the ordinary least squares (OLS) estimator. \textit{Causalinference} provides several common regression specifications. 164 | 165 | By default, the method \texttt{est\_via\_ols} will run the following regression: 166 | \[Y_i = \alpha + \beta D_i + \gamma' (X_i-\bar{X}) + \delta' D_i (X_i-\bar{X}) + \varepsilon_i.\] 167 | 168 | To inspect any treatment effect estimates produced, we can simply invoke \texttt{print} on the attribute \texttt{estimates}, as in below: 169 | \begin{verbatim} 170 | >>> causal.est_via_ols() 171 | >>> print(causal.estimates) 172 | \end{verbatim} 173 | \begin{verbatim} 174 | Treatment Effect Estimates: OLS 175 | 176 | Est. S.e. z P>|z| [95% Conf. int.] 177 | -------------------------------------------------------------------------------- 178 | ATE 3.672 0.906 4.051 0.000 1.895 5.449 179 | ATC -0.227 0.930 -0.244 0.807 -2.050 1.596 180 | ATT 6.186 1.067 5.799 0.000 4.095 8.277 181 | \end{verbatim} 182 | Here ATE, ATC, and ATT stand for, respectively, average treatment effect, average treatment effect for the controls, and average treatment effect for the treated. Like \texttt{summary\_stats}, the attribute \texttt{estimates} is a dictionary-like object that contains the estimation results. 183 | 184 | Including interaction terms between the treatment indicator $D$ and covariates $X$ implies that treatment effects can differ across individuals. In some instances we may want to assume a constant treatment effect, and only run 185 | \[Y_i = \alpha + \beta D_i + \gamma' (X_i-\bar{X}_i) + \varepsilon_i.\] 186 | This can be achieved by supplying a value of 1 in \texttt{est\_via\_ols} to the optional parameter \texttt{adj} (its default value is 2). To compute the raw difference in average outcomes between treatment and control groups, we can set \texttt{adj=0}. 187 | 188 | In this example, the least squares estimates are radically different from the true treatment effect of 10. This is the result of the nonlinearity and non-overlap issues intentionally introduced into the data simulation process. As we shall see, several other tools exist in \textit{Causalinference} that can better deal with a lack of overlap and that will allow us to obtain estimates that are less sensitive to functional form assumptions. 189 | 190 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 191 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 192 | 193 | \section{Propensity Score Estimation} \label{sec.c} 194 | 195 | The probability of getting treatment conditional on the covariates, $p(X_i) = \P(D_i=1|X_i)$, also known as the propensity score, plays a central role in much of what follows. Two methods, \texttt{est\_propensity} and \texttt{est\_propensity\_s}, are provided for propensity score estimation. Both involve running a logistic regression of the treatment indicator $D$ on functions of the covariates. \texttt{est\_propensity} allows the user to specify the covariates to include linearly and/or quadratically, while \texttt{est\_propensity\_s} will make this choice automatically based on a sequence of likelihood ratio tests. 196 | 197 | In the following, we run \texttt{est\_propensity\_s} and display the estimation results. In this example, the specification selection algorithm decided to include both covariates and all the interaction and quadratic terms. 198 | 199 | \begin{verbatim} 200 | >>> causal.est_propensity_s() 201 | >>> print(causal.propensity) 202 | \end{verbatim} 203 | \begin{verbatim} 204 | Estimated Parameters of Propensity Score 205 | 206 | Coef. S.e. z P>|z| [95% Conf. int.] 207 | -------------------------------------------------------------------------------- 208 | Intercept -2.839 0.526 -5.401 0.000 -3.870 -1.809 209 | X1 0.486 0.153 3.178 0.001 0.186 0.786 210 | X0 0.466 0.155 3.011 0.003 0.163 0.770 211 | X1*X0 0.080 0.015 5.391 0.000 0.051 0.109 212 | X0*X0 -0.045 0.012 -3.579 0.000 -0.069 -0.020 213 | X1*X1 -0.045 0.013 -3.542 0.000 -0.070 -0.020 214 | \end{verbatim} 215 | 216 | The \texttt{propensity} attribute is again another dictionary-like container of results. The dictionary keys of \texttt{propensity} can be found by running: 217 | \begin{verbatim} 218 | >>> causal.propensity.keys() 219 | ['coef', 'lin', 'qua', 'loglike', 'fitted', 'se'] 220 | \end{verbatim} 221 | The selected linear and quadratic terms are contained in the lists \texttt{causal.propensity['lin']} and \texttt{causal.propensity['qua']}. Though we won't make direct calls to it, most of the propensity-based techniques discussed subsequently are based on \texttt{causal.propensity['fitted']}, the vector of estimated propensity scores. 222 | 223 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 224 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 225 | 226 | \section{Improving Covariate Balance} \label{sec.d} 227 | 228 | When there is indication of covariate imbalance, we may wish to construct a sample where the treatment and control groups are more similar than the original full sample. One way of doing so is by dropping units with extreme values of propensity score. For these subjects, their covariate values are such that the probability of being in the treatment (or control) group is so overwhelmingly high that we cannot reliably find comparable units in the opposite group. We may wish to forego estimating treatment effects for such units since nothing much can be credibly said about them. 229 | 230 | A good rule-of-thumb is to drop units whose estimated propensity score is less than $\alpha=0.1$ or greater than $1-\alpha=0.9$. By default, once the propensity score has been estimated by running either \texttt{est\_propensity} or \texttt{est\_propensity\_s}, a value of 0.1 will be set for the attribute \texttt{cutoff}: 231 | 232 | \begin{verbatim} 233 | >>> causal.cutoff 234 | 0.1 235 | \end{verbatim} 236 | 237 | Calling \texttt{causal.trim()} at this point will drop every unit that has propensity score outside of the $[\alpha, 1-\alpha]$ interval. Alternatively, a procedure exists that will estimate the optimal cutoff that minimizes the asymptotic sampling variance of the trimmed sample. The method \texttt{trim\_s} will perform this calculation, set the \texttt{cutoff} to the optimal $\alpha$, and then invoke \texttt{trim} to construct the subsample. For our example, the optimal $\alpha$ was estimated to be slightly less than 0.1: 238 | \begin{verbatim} 239 | >>> causal.trim_s() 240 | >>> causal.cutoff 241 | 0.0954928016329 242 | \end{verbatim} 243 | The complexity of this cutoff selection algorithm is only $O(N \log N)$, so in practice there is very little reason to not employ it. 244 | 245 | If we now print \texttt{summary\_stats} again to view the summary statistics of the trimmed sample, we see that the normalized differences in average covariates has fallen noticeably. 246 | \begin{verbatim} 247 | >>> print(causal.summary_stats) 248 | \end{verbatim} 249 | \begin{verbatim} 250 | Summary Statistics 251 | 252 | Controls (N_c=371) Treated (N_t=363) 253 | Variable Mean S.d. Mean S.d. Raw-diff 254 | -------------------------------------------------------------------------------- 255 | Y 41.331 29.608 66.067 28.108 24.736 256 | 257 | Controls (N_c=371) Treated (N_t=363) 258 | Variable Mean S.d. Mean S.d. Nor-diff 259 | -------------------------------------------------------------------------------- 260 | X0 3.709 2.872 4.658 2.522 0.351 261 | X1 3.407 2.784 4.661 2.517 0.472 262 | \end{verbatim} 263 | 264 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 265 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 266 | 267 | \section{Stratifying the Sample} \label{sec.e} 268 | 269 | With the propensity score estimated, one may wish to stratify the sample into blocks that have units that are more similar in terms of their covariates. This makes the treatment and control groups within each propensity bin more comparable, and therefore treatment effect estimates more credible. 270 | 271 | \textit{Causalinference} provides two methods for subclassification based on propensity score. The first, \texttt{stratify}, splits the sample based on what is specified in the attribute \texttt{blocks}. The default value of \texttt{blocks} is set to 5, which means that \texttt{stratify} will split the sample into 5 equal-sized bins. In contrast, the second method, \texttt{stratify\_s}, will use a data-driven procedure for selecting both the number of blocks and their boundaries, with the expectation that the number of blocks should increase with the sample size. Operationally this method is a divide-and-conquer algorithm that recursively divides the sample into two until there is no significant advantage of doing so. This algorithm also runs in $O(N \log N)$ time, so costs relatively little to use. 272 | 273 | To inspect the results of the stratification, we can invoke \texttt{print} on the attribute \texttt{strata} to display some summary statistics, as follows: 274 | \begin{verbatim} 275 | >>> causal.stratify_s() 276 | >>> print(causal.strata) 277 | \end{verbatim} 278 | \begin{verbatim} 279 | Stratification Summary 280 | 281 | Propensity Score Sample Size Ave. Propensity Outcome 282 | Stratum Min. Max. Controls Treated Controls Treated Raw-diff 283 | -------------------------------------------------------------------------------- 284 | 1 0.095 0.265 157 28 0.188 0.187 11.885 285 | 2 0.266 0.474 111 72 0.360 0.367 12.025 286 | 3 0.477 0.728 70 113 0.598 0.601 11.696 287 | 4 0.728 0.836 23 69 0.781 0.787 10.510 288 | 5 0.838 0.904 10 81 0.865 0.873 3.405 289 | \end{verbatim} 290 | 291 | Under the hood, the attribute \texttt{strata} is actually a list-like object that contains, as each of its elements, a full instance of the class \texttt{CausalModel}, with the input data being those that correspond to the units that are in the propensity bin. We can thus, for example, access each stratum and inspect its \texttt{summary\_stats} attribute, or as the following illustrates, loop through \texttt{strata} and estimate within-bin treatment effects using least squares. 292 | \begin{verbatim} 293 | >>> for stratum in causal.strata: 294 | ... stratum.est_via_ols(adj=1) 295 | ... 296 | >>> [stratum.estimates['ols']['ate'] for stratum in causal.strata] 297 | [10.379170390195197, 9.2918973715823707, 9.67876709257445, 9.6722830043583023, 298 | 9.2239596078238222] 299 | \end{verbatim} 300 | 301 | Note that these estimates are much more stable and closer to the true value of 10 than the within-bin raw differences in average outcomes that were reported in the stratification summary table, highlighting the virtue of further controlling for covariates even within blocks. 302 | 303 | Taking the sample-weighted average of the above within-bin least squares estimates results in a propensity score matching estimator that is commonly known as the subclassification estimator or blocking estimator. However, instead of manually looping through the \texttt{strata} attribute, estimating within-bin treatment effects, and then averaging appropriately to arrive at an overall estimate, we can also simply call \texttt{est\_via\_blocking}, which will perform these operations and collect the results in the attribute \texttt{estimates}. We will report these estimates in the next section along with estimates obtained from other, alternative estimators. 304 | 305 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 306 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 307 | 308 | \section{Treatment Effect Estimation} \label{sec.f} 309 | 310 | In addition to least squares and the blocking estimator described in the last section, \textit{Causalinference} provides two alternative treatment effect estimators. The first is the nearest neighborhood matching estimator of Abadie and Imbens \citeyear{AbadieImbens.2006}. Instead of relying on the propensity score, this estimator pairs treatment and control units by matching directly on the covariate vectors themselves. More specifically, each unit $i$ in the sample is matched with a unit $m(i)$ in the opposite group, where 311 | \[m(i) = \argmin_{j: D_j \neq D_i} \|X_j - X_i\|,\] 312 | and $\|X_j - X_i\|$ is some measure of distance between the covariate vectors $X_j$ and $X_i$. The method \texttt{est\_via\_matching} implements this estimator, as well as several extensions that can be invoked through optional arguments. 313 | 314 | The last estimator is a version of the Horvitz-Thompson weighting estimator, modified to further adjust for covariates. Mechanically, this involves running the following weight least squares regression: 315 | \[Y_i = \alpha + \beta D_i + \gamma' X_i + \varepsilon_i,\] 316 | where the weight for unit $i$ is $1/\hat{p}(X)$ if $i$ is in the treatment group, and $1/\big(1-\hat{p}(X)\big)$ if $i$ is in the control group. This estimator is also sometimes called the doubly-robust estimator, referring to the fact that this estimator is consistent if either the specification of the propensity score is correct, or the specification of the regression function is correct. We can invoke it by calling \texttt{est\_via\_weighting}. Note that under this specification the treatment effect does not differ across units, so the ATC and the ATT are both equal to the overall ATE. 317 | 318 | In the following we invoke each of the four estimators (including least squares, since the input data has changed now that the sample has been trimmed), and print out the resulting estimates. 319 | \begin{verbatim} 320 | >>> causal.est_via_ols() 321 | >>> causal.est_via_weighting() 322 | >>> causal.est_via_blocking() 323 | >>> causal.est_via_matching(bias_adj=True) 324 | >>> print(causal.estimates) 325 | \end{verbatim} 326 | \begin{verbatim} 327 | Treatment Effect Estimates: OLS 328 | 329 | Est. S.e. z P>|z| [95% Conf. int.] 330 | -------------------------------------------------------------------------------- 331 | ATE 2.913 0.803 3.627 0.000 1.339 4.487 332 | ATC 2.435 0.824 2.956 0.003 0.820 4.049 333 | ATT 3.401 0.885 3.843 0.000 1.667 5.136 334 | \end{verbatim} 335 | \begin{verbatim} 336 | Treatment Effect Estimates: Weighting 337 | 338 | Est. S.e. z P>|z| [95% Conf. int.] 339 | -------------------------------------------------------------------------------- 340 | ATE 17.821 1.684 10.585 0.000 14.521 21.121 341 | \end{verbatim} 342 | \begin{verbatim} 343 | Treatment Effect Estimates: Blocking 344 | 345 | Est. S.e. z P>|z| [95% Conf. int.] 346 | -------------------------------------------------------------------------------- 347 | ATE 9.702 0.381 25.444 0.000 8.954 10.449 348 | ATC 9.847 0.527 18.701 0.000 8.815 10.879 349 | ATT 9.553 0.332 28.771 0.000 8.903 10.204 350 | \end{verbatim} 351 | \begin{verbatim} 352 | Treatment Effect Estimates: Matching 353 | 354 | Est. S.e. z P>|z| [95% Conf. int.] 355 | -------------------------------------------------------------------------------- 356 | ATE 9.624 0.245 39.354 0.000 9.145 10.103 357 | ATC 9.642 0.270 35.776 0.000 9.114 10.170 358 | ATT 9.606 0.318 30.159 0.000 8.981 10.230 359 | \end{verbatim} 360 | 361 | As we can see above, despite the trimming the least squares estimates are still severely biased, as is the weighting estimator (since neither the propensity score or the regression function is correctly specified). The blocking and matching estimators, on the other hand, are less sensitive to specification assumptions, and thus result in estimates that are closer to the true average treatment effects. 362 | 363 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 364 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 365 | 366 | \bibliographystyle{apacite} 367 | \bibliography{references} 368 | 369 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 370 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 371 | 372 | \begin{table}[ht] 373 | \begin{center}\begin{tabular}{ll} 374 | Attribute & Description \\ 375 | \texttt{summary\_stats} & Dictionary-like object containing summary statistics for the \\ 376 | & outcome variable and the covariates. \\ 377 | \texttt{propensity} & Dictionary-like object containing propensity score data, \\ 378 | & including estimated logistic regression coefficients, predicted \\ 379 | & propensity score, maximized log-likelihood, and the lists of the \\ 380 | & linear and quadratic terms that are included in the regression. \\ 381 | \texttt{cutoff} & Floating point number specifying the cutoff point for trimming \\ 382 | & on propensity score.\\ 383 | \texttt{blocks} & Either an integer indicating the number of equal-sized blocks to \\ 384 | & stratify the sample into, or a list of ascending numbers specifying \\ 385 | & the boundaries of each stratum. \\ 386 | \texttt{strata} & List-like object containing the list of stratified propensity bins. \\ 387 | \texttt{estimates} & Dictionary-like object containing treatment effect estimates for \\ 388 | & each estimator used. 389 | \end{tabular}\end{center} 390 | \caption{Attributes of the class \texttt{CausalModel}. Invoking \texttt{print} on any of the dictionary- or list-like attribute above yields customized summary tables.} \label{tab.a} 391 | \end{table} 392 | 393 | \begin{table}[ht] 394 | \begin{center}\begin{tabular}{ll} 395 | Method & Description \\ 396 | \texttt{reset} & Reinitializes data to original inputs, and drops any estimated results. \\ 397 | \texttt{est\_propensity} & Estimates via logistic regression the propensity score using specified \\ 398 | & linear and quadratic terms. \\ 399 | \texttt{est\_propensity\_s} & Estimates via logistic regression the propensity score using the \\ 400 | & covariate selection algorithm of Imbens and Rubin \citeyear{ImbensRubin.2015}. \\ 401 | \texttt{trim} & Trims data based on propensity score using the threshold specified \\ 402 | & by the attribute \texttt{cutoff}. \\ 403 | \texttt{trim\_s} & Trims data based on propensity score using the cutoff selected by \\ 404 | & the procedure of Crump, Hotz, Imbens, and Mitnik \citeyear{CrumpHotzImbensMitnik.2009}. \\ 405 | \texttt{stratify} & Stratifies the sample based on propensity score as specified by \\ 406 | & the attribute \texttt{blocks}. \\ 407 | \texttt{stratify\_s} & Stratifies the sample based on propensity score using the bin \\ 408 | & selection procedure suggested by Imbens and Rubin \citeyear{ImbensRubin.2015}. \\ 409 | \texttt{est\_via\_ols} & Estimates average treatment effects using least squares. \\ 410 | \texttt{est\_via\_weighting} & Estimates average treatment effects using the Horvitz-Thompson \\ 411 | & weighting estimator modified to incorporate covariates. \\ 412 | \texttt{est\_via\_blocking} & Estimates average treatment effects using regression within blocks. \\ 413 | \texttt{est\_via\_matching} & Estimates average treatment effects using matching with replacement. 414 | \end{tabular}\end{center} 415 | \caption{Methods of the class \texttt{CausalModel}. Invoke \texttt{help} on any of the above methods for more detailed documentation.} \label{tab.b} 416 | \end{table} 417 | 418 | \clearpage 419 | 420 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 421 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 422 | 423 | \end{document} 424 | 425 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | try: 2 | from setuptools import setup 3 | except ImportError: 4 | from distutils.core import setup 5 | 6 | config = { 7 | 'name': 'CausalInference', 8 | 'version': '0.1.3', 9 | 'url': 'https://github.com/laurencium/causalinference', 10 | 'author': 'Laurence Wong', 11 | 'author_email': 'laurencium@gmail.com', 12 | 'packages': ['causalinference', 'causalinference.core', 13 | 'causalinference.estimators', 'causalinference.utils'], 14 | 'include_package_data': True, 15 | 'package_data': {'causalinference': ['utils/*.txt']}, 16 | 'license': 'LICENSE.txt', 17 | 'description': 'Causal Inference in Python', 18 | 'long_description': open('README.rst').read(), 19 | } 20 | 21 | setup(**config) 22 | 23 | -------------------------------------------------------------------------------- /tests/test_blocking.py: -------------------------------------------------------------------------------- 1 | from nose.tools import * 2 | import numpy as np 3 | 4 | import causalinference.estimators.blocking as b 5 | import causalinference.causal as c 6 | 7 | 8 | def test_calc_atx(): 9 | 10 | atxs = [0.5, 3.2, -9.4] 11 | Ns = [5, 13, 7] 12 | ans = -0.868 13 | 14 | assert np.allclose(b.calc_atx(atxs, Ns), ans) 15 | 16 | 17 | def test_atx_se(): 18 | 19 | atx_ses = [0.3, 1.3, 0.8] 20 | Ns = [3, 8, 4] 21 | ans = 0.72788888 22 | 23 | assert np.allclose(b.calc_atx_se(atx_ses, Ns), ans) 24 | 25 | 26 | def test_blocking(): 27 | 28 | Y1 = np.array([52, 30, 5, 29, 12, 10, 44, 87]) 29 | D1 = np.array([0, 0, 0, 0, 1, 1, 1, 1]) 30 | X1 = np.array([[1, 42], [3, 32], [9, 7], [12, 86], 31 | [5, 94], [4, 36], [2, 13], [6, 61]]) 32 | causal1 = c.CausalModel(Y1, D1, X1) 33 | Y2 = np.array([16, 4, 10, 6, 9, 11]) 34 | D2 = np.array([0, 0, 0, 1, 1, 1]) 35 | X2 = np.array([[1], [3], [3], [1], [7], [2]]) 36 | causal2 = c.CausalModel(Y2, D2, X2) 37 | strata = [causal1, causal2] 38 | 39 | adj1 = 0 40 | blocking1 = b.Blocking(strata, adj1) 41 | ate1 = 4.714286 42 | atc1 = 4.714286 43 | att1 = 4.714286 44 | ate_se1 = 10.18945 45 | atc_se1 = 10.18945 46 | att_se1 = 10.18945 47 | assert np.allclose(blocking1['ate'], ate1) 48 | assert np.allclose(blocking1['atc'], atc1) 49 | assert np.allclose(blocking1['att'], att1) 50 | assert np.allclose(blocking1['ate_se'], ate_se1) 51 | assert np.allclose(blocking1['atc_se'], atc_se1) 52 | assert np.allclose(blocking1['att_se'], att_se1) 53 | 54 | adj2 = 1 55 | blocking2 = b.Blocking(strata, adj2) 56 | ate2 = 1.657703 57 | atc2 = 1.657703 58 | att2 = 1.657703 59 | ate_se2 = 10.22921 60 | atc_se2 = 10.22921 61 | att_se2 = 10.22921 62 | assert np.allclose(blocking2['ate'], ate2) 63 | assert np.allclose(blocking2['atc'], atc2) 64 | assert np.allclose(blocking2['att'], att2) 65 | assert np.allclose(blocking2['ate_se'], ate_se2) 66 | assert np.allclose(blocking2['atc_se'], atc_se2) 67 | assert np.allclose(blocking2['att_se'], att_se2) 68 | 69 | adj3 = 2 70 | blocking3 = b.Blocking(strata, adj3) 71 | ate3 = 17.83044057 72 | atc3 = 35.45842407 73 | att3 = 0.20250793 74 | ate_se3 = 11.42591 75 | atc_se3 = 17.11964 76 | att_se3 = 6.87632 77 | assert np.allclose(blocking3['ate'], ate3) 78 | assert np.allclose(blocking3['atc'], atc3) 79 | assert np.allclose(blocking3['att'], att3) 80 | assert np.allclose(blocking3['ate_se'], ate_se3) 81 | assert np.allclose(blocking3['atc_se'], atc_se3) 82 | assert np.allclose(blocking3['att_se'], att_se3) 83 | 84 | -------------------------------------------------------------------------------- /tests/test_causal.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from nose.tools import * 3 | import numpy as np 4 | 5 | import causalinference.causal as c 6 | from utils import random_data 7 | 8 | 9 | def test_est_propensity(): 10 | 11 | D = np.array([0, 0, 0, 1, 1, 1]) 12 | X = np.array([[7, 8], [3, 10], [7, 10], [4, 7], [5, 10], [9, 8]]) 13 | Y = random_data(D_cur=D, X_cur=X) 14 | causal = c.CausalModel(Y, D, X) 15 | 16 | causal.est_propensity() 17 | lin = [0, 1] 18 | qua = [] 19 | coef = np.array([6.8066090, -0.0244874, -0.7524939]) 20 | loglike = -3.626517 21 | fitted = np.array([0.6491366, 0.3117840, 0.2911631, 22 | 0.8086407, 0.3013733, 0.6379023]) 23 | se = np.array([8.5373779, 0.4595191, 0.8106499]) 24 | keys = {'lin', 'qua', 'coef', 'loglike', 'fitted', 'se'} 25 | 26 | assert_equal(causal.propensity['lin'], lin) 27 | assert_equal(causal.propensity['qua'], qua) 28 | assert np.allclose(causal.propensity['coef'], coef) 29 | assert np.allclose(causal.propensity['loglike'], loglike) 30 | assert np.allclose(causal.propensity['fitted'], fitted) 31 | assert np.allclose(causal.propensity['se'], se) 32 | assert_equal(set(causal.propensity.keys()), keys) 33 | assert np.allclose(causal.raw_data['pscore'], fitted) 34 | 35 | 36 | def test_est_propensity_s(): 37 | 38 | D = np.array([0, 0, 0, 1, 1, 1]) 39 | X = np.array([[7, 8], [3, 10], [7, 10], [4, 7], [5, 10], [9, 8]]) 40 | Y = random_data(D_cur=D, X_cur=X) 41 | causal = c.CausalModel(Y, D, X) 42 | 43 | causal.est_propensity_s() 44 | lin1 = [1] 45 | qua1 = [] 46 | coef1 = np.array([6.5424027, -0.7392041]) 47 | loglike1 = -3.627939 48 | fitted1 = np.array([0.6522105, 0.2995088, 0.2995088, 49 | 0.7970526, 0.2995088, 0.6522105]) 50 | se1 = np.array([6.8455179, 0.7641445]) 51 | keys = {'lin', 'qua', 'coef', 'loglike', 'fitted', 'se'} 52 | 53 | assert_equal(causal.propensity['lin'], lin1) 54 | assert_equal(causal.propensity['qua'], qua1) 55 | assert np.allclose(causal.propensity['coef'], coef1) 56 | assert np.allclose(causal.propensity['loglike'], loglike1) 57 | assert np.allclose(causal.propensity['fitted'], fitted1) 58 | assert np.allclose(causal.propensity['se'], se1) 59 | assert_equal(set(causal.propensity.keys()), keys) 60 | assert np.allclose(causal.raw_data['pscore'], fitted1) 61 | 62 | causal.est_propensity_s([0,1]) 63 | lin2 = [0, 1] 64 | qua2 = [] 65 | coef2 = np.array([6.8066090, -0.0244874, -0.7524939]) 66 | loglike2 = -3.626517 67 | fitted2 = np.array([0.6491366, 0.3117840, 0.2911631, 68 | 0.8086407, 0.3013733, 0.6379023]) 69 | se2 = np.array([8.5373779, 0.4595191, 0.8106499]) 70 | 71 | assert_equal(causal.propensity['lin'], lin2) 72 | assert_equal(causal.propensity['qua'], qua2) 73 | assert np.allclose(causal.propensity['coef'], coef2) 74 | assert np.allclose(causal.propensity['loglike'], loglike2) 75 | assert np.allclose(causal.propensity['fitted'], fitted2) 76 | assert np.allclose(causal.propensity['se'], se2) 77 | assert np.allclose(causal.raw_data['pscore'], fitted2) 78 | 79 | 80 | def test_est_via_ols(): 81 | 82 | Y = np.array([52, 30, 5, 29, 12, 10, 44, 87]) 83 | D = np.array([0, 0, 0, 0, 1, 1, 1, 1]) 84 | X = np.array([[1, 42], [3, 32], [9, 7], [12, 86], 85 | [5, 94], [4, 36], [2, 13], [6, 61]]) 86 | causal = c.CausalModel(Y, D, X) 87 | 88 | adj1 = 0 89 | causal.est_via_ols(adj1) 90 | ate1 = 9.25 91 | ate_se1 = 17.68253 92 | keys1 = {'ate', 'ate_se'} 93 | assert np.allclose(causal.estimates['ols']['ate'], ate1) 94 | assert np.allclose(causal.estimates['ols']['ate_se'], ate_se1) 95 | assert_equal(set(causal.estimates['ols'].keys()), keys1) 96 | 97 | adj2 = 1 98 | causal.est_via_ols(adj2) 99 | ate2 = 3.654552 100 | ate_se2 = 17.749993 101 | keys2 = {'ate', 'ate_se'} 102 | assert np.allclose(causal.estimates['ols']['ate'], ate2) 103 | assert np.allclose(causal.estimates['ols']['ate_se'], ate_se2) 104 | assert_equal(set(causal.estimates['ols'].keys()), keys2) 105 | 106 | adj3 = 2 107 | causal.est_via_ols(adj3) 108 | ate3 = 30.59444 109 | atc3 = 63.2095 110 | att3 = -2.020611 111 | ate_se3 = 19.91887865 112 | atc_se3 = 29.92152 113 | att_se3 = 11.8586 114 | keys3 = {'ate', 'atc', 'att', 'ate_se', 'atc_se', 'att_se'} 115 | assert np.allclose(causal.estimates['ols']['ate'], ate3) 116 | assert np.allclose(causal.estimates['ols']['atc'], atc3) 117 | assert np.allclose(causal.estimates['ols']['att'], att3) 118 | assert np.allclose(causal.estimates['ols']['ate_se'], ate_se3) 119 | assert np.allclose(causal.estimates['ols']['atc_se'], atc_se3) 120 | assert np.allclose(causal.estimates['ols']['att_se'], att_se3) 121 | assert_equal(set(causal.estimates['ols'].keys()), keys3) 122 | 123 | 124 | def test_parse_lin_terms(): 125 | 126 | K1 = 4 127 | lin1 = None 128 | ans1 = [] 129 | assert_equal(c.parse_lin_terms(K1, lin1), ans1) 130 | 131 | K2 = 2 132 | lin2 = 'all' 133 | ans2 = [0, 1] 134 | assert_equal(c.parse_lin_terms(K2, lin2), ans2) 135 | 136 | K3 = 2 137 | lin3 = [1] 138 | ans3 = [1] 139 | assert_equal(c.parse_lin_terms(K3, lin3), ans3) 140 | 141 | K4 = 2 142 | lin4 = [] 143 | ans4 = [] 144 | assert_equal(c.parse_lin_terms(K4, lin4), ans4) 145 | 146 | 147 | def test_parse_qua_terms(): 148 | 149 | K1 = 3 150 | qua1 = None 151 | ans1 = [] 152 | assert_equal(c.parse_qua_terms(K1, qua1), ans1) 153 | 154 | K2 = 2 155 | qua2 = 'all' 156 | ans2 = [(0, 0), (0, 1), (1, 1)] 157 | assert_equal(c.parse_qua_terms(K2, qua2), ans2) 158 | 159 | K3 = 2 160 | qua3 = [(0, 1)] 161 | ans3 = [(0, 1)] 162 | assert_equal(c.parse_qua_terms(K3, qua3), ans3) 163 | 164 | K4 = 2 165 | qua4 = [] 166 | ans4 = [] 167 | assert_equal(c.parse_qua_terms(K4, qua4), ans4) 168 | 169 | 170 | def test_split_equal_bins(): 171 | 172 | pscore = np.array([0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 173 | 0.6, 0.7, 0.8, 0.9, 0.95]) 174 | blocks = 5 175 | ans = [0, 0.2, 0.4, 0.6, 0.8, 1] 176 | 177 | assert_equal(c.split_equal_bins(pscore, blocks), ans) 178 | 179 | 180 | def test_sumlessthan(): 181 | 182 | g1 = np.array([3, 1, 2, 4, 3, 3]) 183 | sg1 = np.array([1, 2, 3, 3, 3, 4]) 184 | cs11 = np.array([1, 2, 3, 4, 5, 6]) 185 | csg1 = np.array([1, 3, 6, 9, 12, 16]) 186 | 187 | ans1 = np.array([5, 1, 2, 6, 5, 5]) 188 | ans2 = np.array([12, 1, 3, 16, 12, 12]) 189 | assert np.array_equal(c.sumlessthan(g1, sg1, cs11), ans1) 190 | assert np.array_equal(c.sumlessthan(g1, sg1, csg1), ans2) 191 | 192 | g2 = np.array([22, 4, 6, 4, 25, 5]) 193 | sg2 = np.array([4, 4, 5, 6, 22, 25]) 194 | cs12 = np.array([1, 2, 3, 4, 5, 6]) 195 | csg2 = np.array([4, 8, 13, 19, 41, 66]) 196 | 197 | ans3 = np.array([5, 2, 4, 2, 6, 3]) 198 | ans4 = np.array([41, 8, 19, 8, 66, 13]) 199 | assert np.array_equal(c.sumlessthan(g2, sg2, cs12), ans3) 200 | assert np.array_equal(c.sumlessthan(g2, sg2, csg2), ans4) 201 | 202 | 203 | def test_select_cutoff(): 204 | 205 | g1 = np.array([3, 1, 2, 4, 3, 3]) 206 | ans1 = 0 207 | assert_equal(c.select_cutoff(g1), ans1) 208 | 209 | g2 = np.array([22, 4, 6, 4, 25, 5]) 210 | ans2 = 0.2113248654 211 | assert np.allclose(c.select_cutoff(g2), ans2) 212 | 213 | 214 | def test_calc_tstat(): 215 | 216 | sample1 = np.array([1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 217 | 3, 3, 3, 3, 3, 3, 4, 4, 4, 5]) 218 | sample2 = np.array([5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 219 | 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2]) 220 | ans = 3.632233 221 | 222 | assert np.allclose(c.calc_tstat(sample1, sample2), ans) 223 | 224 | 225 | def test_calc_sample_sizes(): 226 | 227 | D1 = np.array([0, 1, 0, 1, 0, 1]) 228 | ans1 = (2, 1, 1, 2) 229 | assert_equal(c.calc_sample_sizes(D1), ans1) 230 | 231 | D2 = np.array([0, 1, 0, 1, 0]) 232 | ans2 = (1, 1, 2, 1) 233 | assert_equal(c.calc_sample_sizes(D2), ans2) 234 | 235 | D3 = np.array([1, 1, 1, 1, 1, 1]) 236 | ans3 = (0, 3, 0, 3) 237 | assert_equal(c.calc_sample_sizes(D3), ans3) 238 | 239 | D4 = np.array([0, 0, 0]) 240 | ans4 = (1, 0, 2, 0) 241 | assert_equal(c.calc_sample_sizes(D4), ans4) 242 | 243 | 244 | def test_select_blocks(): 245 | 246 | pscore1 = np.array([0.05, 0.06, 0.3, 0.4, 0.5, 0.6, 0.7, 0.95, 0.95]) 247 | D1 = np.array([0, 0, 1, 1, 0, 0, 1, 1, 1]) 248 | logodds1 = np.log(pscore1 / (1-pscore1)) 249 | K1 = 1 250 | ans1 = np.array([0.05, 0.5, 0.5, 0.95]) 251 | test1 = np.array(c.select_blocks(pscore1, logodds1, D1, K1, 0, 1)) 252 | assert np.allclose(test1, ans1) 253 | 254 | pscore2 = np.array([0.05, 0.06, 0.3, 0.4, 0.5, 0.6, 0.7, 0.95, 0.95]) 255 | D2 = np.array([0, 0, 1, 1, 0, 0, 1, 1, 1]) 256 | logodds2 = np.log(pscore1 / (1-pscore1)) 257 | K2 = 2 258 | ans2 = np.array([0, 1]) 259 | test2 = np.array(c.select_blocks(pscore2, logodds2, D2, K2, 0, 1)) 260 | assert np.allclose(test2, ans2) 261 | 262 | -------------------------------------------------------------------------------- /tests/test_data.py: -------------------------------------------------------------------------------- 1 | from nose.tools import * 2 | import numpy as np 3 | 4 | import causalinference.core.data as d 5 | 6 | 7 | def test_preprocess(): 8 | 9 | Y1 = np.array([[1.2], [3.45], [-6], [78.90]]) 10 | D1 = np.array([[0], [1], [0.0], [1]]) 11 | X1 = np.array([-1, 3, -5.6, 8.9]) 12 | Y_out, D_out, X_out = d.preprocess(Y1, D1, X1) 13 | 14 | ans1 = np.array([1.2, 3.45, -6, 78.9]) 15 | assert np.array_equal(Y_out, ans1) 16 | 17 | ans2 = np.array([0, 1.0, -0.0, 1]) 18 | assert np.array_equal(D_out, ans2) 19 | 20 | ans3 = np.array([[-1], [3], [-5.6], [8.9]]) 21 | assert np.array_equal(X_out, ans3) 22 | 23 | 24 | Y2 = np.array([3, 98]) 25 | D2 = np.array([[5], [21.9], [-53]]) 26 | X2 = np.array([1, 3.14]) 27 | assert_raises(IndexError, d.preprocess, Y2, D2, X2) 28 | 29 | 30 | def test_data(): 31 | 32 | Y1 = np.array([1.2, 3.45, -6, 78.90, -9, 8.7654]) 33 | D1 = np.array([0, 1, 0, 1.0, 0.0, 1]) 34 | X1 = np.array([[-1, 2], [3, -4], [-5.6, -7], [8.9, 0.0], [99, 877], [-666, 54321]]) 35 | data = d.Data(Y1, D1, X1) 36 | 37 | ans1 = np.array([1.2, 3.45, -6, 78.9, -9, 8.7654]) 38 | assert np.array_equal(data['Y'], ans1) 39 | 40 | ans2 = np.array([0, 1, 0, 1, 0, 1]) 41 | assert np.array_equal(data['D'], ans2) 42 | 43 | ans3 = np.array([[-1, 2], [3, -4], [-5.6, -7], [8.9, 0], [99, 877], [-666, 54321]]) 44 | assert np.array_equal(data['X'], ans3) 45 | 46 | ans4 = 6 47 | assert_equal(data['N'], ans4) 48 | 49 | ans5 = 2 50 | assert_equal(data['K'], ans5) 51 | 52 | ans6 = np.array([True, False, True, False, True, False]) 53 | assert np.array_equal(data['controls'], ans6) 54 | 55 | ans7 = np.array([False, True, False, True, False, True]) 56 | assert np.array_equal(data['treated'], ans7) 57 | 58 | ans8 = np.array([1.2, -6, -9]) 59 | assert np.array_equal(data['Y_c'], ans8) 60 | 61 | ans9 = np.array([3.45, 78.9, 8.7654]) 62 | assert np.array_equal(data['Y_t'], ans9) 63 | 64 | ans10 = np.array([[-1, 2], [-5.6, -7], [99, 877]]) 65 | assert np.array_equal(data['X_c'], ans10) 66 | 67 | ans11 = np.array([[3, -4], [8.9, 0], [-666, 54321]]) 68 | assert np.array_equal(data['X_t'], ans11) 69 | 70 | ans12 = 3 71 | assert_equal(data['N_t'], ans12) 72 | 73 | ans13 = 3 74 | assert_equal(data['N_c'], ans13) 75 | 76 | ans14 = 'int' 77 | assert_equal(data['D'].dtype, ans14) 78 | 79 | ans15 = {'Y', 'D', 'X', 'N', 'K', 'controls', 'treated', 80 | 'Y_c', 'Y_t', 'X_c', 'X_t', 'N_c', 'N_t'} 81 | assert_equal(set(data.keys()), ans15) 82 | 83 | Y2 = np.array([[1.2], [3.45], [-6], [78.90]]) 84 | D2 = np.array([[0], [1], [0.0], [1]]) 85 | X2 = np.array([[-1, 2], [3, -4], [-5.6, -7], [8.9, 0.0]]) 86 | assert_raises(ValueError, d.Data, Y2, D2, X2) 87 | 88 | -------------------------------------------------------------------------------- /tests/test_matching.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from nose.tools import * 3 | import numpy as np 4 | 5 | import causalinference.estimators.matching as m 6 | 7 | 8 | def test_norm(): 9 | 10 | X_i = np.array([1, 7, 3]) 11 | X_m = np.array([[4, 2, 5], [9, 8, 6]]) 12 | 13 | W1 = np.array([0.5, 1, 0.25]) 14 | ans1 = np.array([30.5, 35.25]) 15 | assert np.array_equal(m.norm(X_i, X_m, W1), ans1) 16 | 17 | W2 = np.array([[0.5, -0.1, 0.7], [-0.1, 1, 3], [0.7, 3, 0.25]]) 18 | ans2 = np.array([-18.1, 85.25]) 19 | assert np.array_equal(m.norm(X_i, X_m, W2), ans2) 20 | 21 | 22 | def test_smallestm(): 23 | 24 | d1 = np.array([1, 3, 2]) 25 | m1 = 1 26 | ans1 = np.array([0]) 27 | assert_equal(set(m.smallestm(d1, m1)), set(ans1)) 28 | 29 | d2 = np.array([1, 3, 2]) 30 | m2 = 2 31 | ans2 = np.array([0, 2]) 32 | assert_equal(set(m.smallestm(d2, m2)), set(ans2)) 33 | 34 | d3 = np.array([9, 2, 5, 9, 1, 2, 7]) 35 | m3 = 1 36 | ans3 = np.array([4]) 37 | assert_equal(set(m.smallestm(d3, m3)), set(ans3)) 38 | 39 | d4 = np.array([9, 2, 5, 9, 1, 2, 7]) 40 | m4 = 2 41 | ans4 = np.array([4, 1, 5]) 42 | assert_equal(set(m.smallestm(d4, m4)), set(ans4)) 43 | 44 | d5 = np.array([9, 2, 5, 9, 1, 2, 7]) 45 | m5 = 3 46 | ans5 = np.array([4, 1, 5]) 47 | assert_equal(set(m.smallestm(d5, m5)), set(ans5)) 48 | 49 | d6 = np.array([9, 2, 5, 9, 1, 2, 7]) 50 | m6 = 4 51 | ans6 = np.array([4, 1, 5, 2]) 52 | assert_equal(set(m.smallestm(d6, m6)), set(ans6)) 53 | 54 | d7 = np.array([-3.2, -3.2, 9.66, -3.2, 28.4]) 55 | m7 = 1 56 | ans7 = np.array([0, 1, 3]) 57 | assert_equal(set(m.smallestm(d7, m7)), set(ans7)) 58 | 59 | 60 | def test_match(): 61 | 62 | X_i = np.array([1, 7, 3]) 63 | X_m = np.array([[9, 8, 6], [4, 2, 5]]) 64 | 65 | W1 = np.array([0.5, 1, 0.25]) 66 | m1 = 1 67 | ans1 = np.array([1]) 68 | assert_equal(set(m.match(X_i, X_m, W1, m1)), set(ans1)) 69 | 70 | W2 = np.array([[0.5, -0.1, 0.7], [-0.1, 1, 3], [0.7, 3, 0.25]]) 71 | m2 = 1 72 | ans2 = np.array([1]) 73 | assert_equal(set(m.match(X_i, X_m, W2, m2)), set(ans2)) 74 | 75 | 76 | def test_bias_coefs(): 77 | 78 | Y_m = np.array([4, 2, 5, 2]) 79 | X_m = np.array([[7, 6], [5, 4], [2, 3], [3, 5]]) 80 | matches = [np.array([1, 0, 2]), np.array([1, 2]), 81 | np.array([2, 0]), np.array([0]), np.array([0, 1])] 82 | 83 | ans = np.array([-2, 3]) 84 | assert np.allclose(m.bias_coefs(matches, Y_m, X_m), ans) 85 | 86 | 87 | def test_bias(): 88 | 89 | X = np.array([[1, 2, 3], [-3, -2, -1]]) 90 | X_m = np.array([[4, 2, 6], [5, 7, 3], [9, 4, 1]]) 91 | matches = [np.array([0, 1, 2]), np.array([1])] 92 | coefs = np.array([-2, 0, 3]) 93 | 94 | ans = np.array([-9, -4]) 95 | assert np.allclose(m.bias(X, X_m, matches, coefs), ans) 96 | 97 | 98 | def test_scaled_counts(): 99 | 100 | N = 10 101 | matches = [np.array([3, 0, 1]), np.array([7]), np.array([1, 9])] 102 | 103 | ans = np.array([1/3, 1/3+1/2, 0, 1/3, 0, 0, 0, 1, 0, 1/2]) 104 | assert np.allclose(m.scaled_counts(N, matches), ans) 105 | 106 | 107 | def test_calc_atx_var(): 108 | 109 | vars_c = np.array([1, 2]) 110 | vars_t = np.array([0.5, 1, 0.25]) 111 | weights_c = np.array([1.5, 0.5]) 112 | weights_t = np.array([1, 1, 1]) 113 | 114 | out_var = m.calc_atx_var(vars_c, vars_t, weights_c, weights_t) 115 | ans = 0.8819444 116 | assert np.allclose(out_var, ans) 117 | 118 | 119 | def test_calc_atc_se(): 120 | 121 | vars_c = np.array([1, 2]) 122 | vars_t = np.array([0.5, 1, 0.25]) 123 | scaled_counts_t = np.array([1, 1, 0]) 124 | 125 | out_se = m.calc_atc_se(vars_c, vars_t, scaled_counts_t) 126 | ans = 1.0606602 127 | assert np.allclose(out_se, ans) 128 | 129 | 130 | def test_calc_att_se(): 131 | 132 | vars_c = np.array([1, 2]) 133 | vars_t = np.array([0.5, 1, 0.25]) 134 | scaled_counts_c = np.array([1, 2]) 135 | 136 | out_se = m.calc_att_se(vars_c, vars_t, scaled_counts_c) 137 | ans = 1.0929064 138 | assert np.allclose(out_se, ans) 139 | 140 | 141 | def test_calc_ate_se(): 142 | 143 | vars_c = np.array([1, 2]) 144 | vars_t = np.array([0.5, 1, 0.25]) 145 | scaled_counts_c = np.array([1, 2]) 146 | scaled_counts_t = np.array([1, 1, 0]) 147 | 148 | out_se = m.calc_ate_se(vars_c, vars_t, scaled_counts_c, scaled_counts_t) 149 | ans = 1.0630146 150 | assert np.allclose(out_se, ans) 151 | 152 | -------------------------------------------------------------------------------- /tests/test_ols.py: -------------------------------------------------------------------------------- 1 | from nose.tools import * 2 | import numpy as np 3 | 4 | import causalinference.estimators.ols as o 5 | import causalinference.core.data as d 6 | 7 | 8 | def test_form_matrix(): 9 | 10 | D = np.array([0, 1, 0, 1]) 11 | X = np.array([[1], [2], [3], [4]]) 12 | 13 | adj1 = 0 14 | ans1 = np.array([[1, 0], [1, 1], [1, 0], [1, 1]]) 15 | assert np.array_equal(o.form_matrix(D, X, adj1), ans1) 16 | 17 | adj2 = 1 18 | ans2 = np.array([[1, 0, -1.5], [1, 1, -0.5], 19 | [1, 0, 0.5], [1, 1, 1.5]]) 20 | assert np.array_equal(o.form_matrix(D, X, adj2), ans2) 21 | 22 | adj3 = 2 23 | ans3 = np.array([[1, 0, -1.5, 0], [1, 1, -0.5, -0.5], 24 | [1, 0, 0.5, 0], [1, 1, 1.5, 1.5]]) 25 | assert np.array_equal(o.form_matrix(D, X, adj3), ans3) 26 | 27 | 28 | def test_calc_ate(): 29 | 30 | olscoef = np.array([1, 2, 3, 4]) 31 | ans = 2 32 | 33 | assert_equal(o.calc_ate(olscoef), ans) 34 | 35 | 36 | def test_calc_atx(): 37 | 38 | olscoef = np.array([1, 2, 3, 4, 5, 6]) 39 | meandiff = np.array([7, 8]) 40 | ans = 85 41 | 42 | assert_equal(o.calc_atx(olscoef, meandiff), ans) 43 | 44 | 45 | def test_calc_cov(): 46 | 47 | Z = np.array([[4, 4, 4, 2, 1, 3], [4, 2, 2, 6, 2, 2], 48 | [3, 4, 2, 1, 3, 1], [2, 3, 0, 0, 1, 2], 49 | [4, 3, 2, 1, 4, 2], [2, 5, 4, 2, 2, 0]]) 50 | u = np.array([1, 3, 6, 4, 3, 1]) 51 | ans = np.array([[434.755102, 8.442177, -87.529252, 52 | -77.227211, -204.360544, -354.38095], 53 | [8.442177, 1.988662, -3.601814, 54 | -1.224943, -4.913832, -6.68254], 55 | [-87.529252, -3.601814, 19.817710, 56 | 15.136009, 41.933787, 71.05079], 57 | [-77.227211, -1.224943, 15.136009, 58 | 14.185125, 35.989569, 62.89841], 59 | [-204.360544, -4.913831, 41.933787, 60 | 35.989569, 97.145125, 166.58730], 61 | [-354.380952, -6.682540, 71.050794, 62 | 62.898413, 166.587302, 289.11111]]) 63 | 64 | assert np.allclose(o.calc_cov(Z, u), ans) 65 | 66 | 67 | def test_submatrix(): 68 | 69 | cov = np.array([[1, 2, 3, 4, 5, 6], [7, 9, 8, 9, 8, 7], 70 | [1, 2, 3, 4, 5, 6], [7, 8, 9, 1, 2, 3], 71 | [4, 6, 5, 6, 5, 4], [7, 3, 8, 9, 2, 1]]) 72 | ans = np.array([[9, 8, 7], [6, 5, 4], [3, 2, 1]]) 73 | 74 | assert np.allclose(o.submatrix(cov), ans) 75 | 76 | 77 | def test_calc_ate_se(): 78 | 79 | subcov = np.array([[9, 8, 7], [6, 5, 4], [3, 2, 1]]) 80 | ans = np.sqrt(5) 81 | 82 | assert_equal(o.calc_ate_se(subcov), ans) 83 | 84 | 85 | def test_calc_atx_se(): 86 | 87 | cov = np.array([[1, 2, 3, 4, 5, 6], [7, 9, 8, 9, 8, 7], 88 | [1, 2, 3, 4, 5, 6], [7, 8, 9, 1, 2, 3], 89 | [4, 6, 5, 6, 5, 4], [7, 3, 8, 9, 2, 1]]) 90 | meandiff = np.array([3, 7]) 91 | ans = 18.46619 92 | 93 | assert np.allclose(o.calc_atx_se(cov, meandiff), ans) 94 | 95 | 96 | def test_ols(): 97 | 98 | Y = np.array([52, 30, 5, 29, 12, 10, 44, 87]) 99 | D = np.array([0, 0, 0, 0, 1, 1, 1, 1]) 100 | X = np.array([[1, 42], [3, 32], [9, 7], [12, 86], 101 | [5, 94], [4, 36], [2, 13], [6, 61]]) 102 | data = d.Data(Y, D, X) 103 | 104 | adj1 = 0 105 | ols1 = o.OLS(data, adj1) 106 | ate1 = 9.25 107 | ate_se1 = 17.68253 108 | keys1 = {'ate', 'ate_se'} 109 | assert np.allclose(ols1['ate'], ate1) 110 | assert np.allclose(ols1['ate_se'], ate_se1) 111 | assert_equal(set(ols1.keys()), keys1) 112 | 113 | adj2 = 1 114 | ols2 = o.OLS(data, adj2) 115 | ate2 = 3.654552 116 | ate_se2 = 17.749993 117 | keys2 = {'ate', 'ate_se'} 118 | assert np.allclose(ols2['ate'], ate2) 119 | assert np.allclose(ols2['ate_se'], ate_se2) 120 | assert_equal(set(ols2.keys()), keys2) 121 | 122 | adj3 = 2 123 | ols3 = o.OLS(data, adj3) 124 | ate3 = 30.59444 125 | atc3 = 63.2095 126 | att3 = -2.020611 127 | ate_se3 = 19.91887865 128 | atc_se3 = 29.92152 129 | att_se3 = 11.8586 130 | keys3 = {'ate', 'atc', 'att', 'ate_se', 'atc_se', 'att_se'} 131 | assert np.allclose(ols3['ate'], ate3) 132 | assert np.allclose(ols3['atc'], atc3) 133 | assert np.allclose(ols3['att'], att3) 134 | assert np.allclose(ols3['ate_se'], ate_se3) 135 | assert np.allclose(ols3['atc_se'], atc_se3) 136 | assert np.allclose(ols3['att_se'], att_se3) 137 | assert_equal(set(ols3.keys()), keys3) 138 | 139 | -------------------------------------------------------------------------------- /tests/test_propensity.py: -------------------------------------------------------------------------------- 1 | from nose.tools import * 2 | import numpy as np 3 | 4 | import causalinference.core.data as d 5 | import causalinference.core.propensity as p 6 | from utils import random_data 7 | 8 | 9 | def test_form_matrix(): 10 | 11 | X = np.array([[1, 3], [5, 7], [8, 6], [4, 2]]) 12 | 13 | ans0 = np.column_stack((np.ones(4), X)) 14 | assert np.array_equal(p.form_matrix(X, [0, 1], []), ans0) 15 | 16 | lin1 = [0] 17 | qua1 = [(0, 1), (1, 1)] 18 | ans1 = np.array([[1, 1, 3, 9], [1, 5, 35, 49], 19 | [1, 8, 48, 36], [1, 4, 8, 4]]) 20 | assert np.array_equal(p.form_matrix(X, lin1, qua1), ans1) 21 | 22 | lin2 = [0] 23 | qua2 = [(1, 0), (1, 1)] 24 | ans2 = np.array([[1, 1, 3, 9], [1, 5, 35, 49], 25 | [1, 8, 48, 36], [1, 4, 8, 4]]) 26 | assert np.array_equal(p.form_matrix(X, lin2, qua2), ans2) 27 | 28 | lin3 = [0, 1] 29 | qua3 = [(0, 0)] 30 | ans3 = np.array([[1, 1, 3, 1], [1, 5, 7, 25], 31 | [1, 8, 6, 64], [1, 4, 2, 16]]) 32 | assert np.array_equal(p.form_matrix(X, lin3, qua3), ans3) 33 | 34 | 35 | def test_sigmoid(): 36 | 37 | x = np.array([0, 10000, -10000, 5]) 38 | ans = np.array([0.5, 1.0, 0.0, 1/(1+np.exp(-5))]) 39 | assert np.array_equal(p.sigmoid(x), ans) 40 | 41 | 42 | def test_log1exp(): 43 | 44 | x = np.array([0, 10000, -10000, 5]) 45 | ans = np.array([np.log(2), 0.0, 10000, np.log(1+np.exp(-5))]) 46 | assert np.array_equal(p.log1exp(x), ans) 47 | 48 | 49 | def test_neg_loglike(): 50 | 51 | beta = np.array([1, 2]) 52 | X_c = np.array([[100, 50], [-2, 1], [-500, -1300], [1, 0]]) 53 | X_t = np.array([[0, 0], [50, 25], [-50, -75], [0, -0.5]]) 54 | ans = 2 * (200 + np.log(2) + np.log(1+np.e)) 55 | assert_equal(p.neg_loglike(beta, X_c, X_t), ans) 56 | 57 | 58 | def test_neg_gradient(): 59 | 60 | beta = np.array([2, -1]) 61 | X_c = np.array([[1, 2], [125, 50]]) 62 | X_t = np.array([[50, 0], [2.5, 4]]) 63 | ans = np.array([125.5 - 2.5/(1+np.e), 51 - 4/(1+np.e)]) 64 | assert np.array_equal(p.neg_gradient(beta, X_c, X_t), ans) 65 | 66 | 67 | def test_calc_coef(): 68 | 69 | X_c = np.array([[1, 1, 8], [1, 8, 5]]) 70 | X_t = np.array([[1, 10, 2], [1, 5, 8]]) 71 | ans = np.array([-6.9441137, 0.6608454, 0.4900669]) 72 | 73 | assert np.allclose(p.calc_coef(X_c, X_t), ans) 74 | 75 | 76 | def test_calc_se(): 77 | 78 | Z = np.array([[1, 64, 188], [1, 132, 59], [1, 106, 72], [1, 86, 154]]) 79 | phat = np.array([0.5101151, 0.3062871, 0.8566664, 0.3269315]) 80 | ans = np.array([25.56301220, 0.16572624, 0.07956535]) 81 | 82 | assert np.allclose(p.calc_se(Z, phat), ans) 83 | 84 | 85 | def test_propensity(): 86 | 87 | D = np.array([0, 0, 0, 1, 1, 1]) 88 | X = np.array([[7, 8], [3, 10], [7, 10], [4, 7], [5, 10], [9, 8]]) 89 | Y = random_data(D_cur=D, X_cur=X) 90 | 91 | data = d.Data(Y, D, X) 92 | propensity = p.Propensity(data, [0, 1], []) 93 | lin = [0, 1] 94 | qua = [] 95 | coef = np.array([6.8066090, -0.0244874, -0.7524939]) 96 | loglike = -3.626517 97 | fitted = np.array([0.6491366, 0.3117840, 0.2911631, 98 | 0.8086407, 0.3013733, 0.6379023]) 99 | se = np.array([8.5373779, 0.4595191, 0.8106499]) 100 | keys = {'lin', 'qua', 'coef', 'loglike', 'fitted', 'se'} 101 | 102 | assert_equal(propensity['lin'], lin) 103 | assert_equal(propensity['qua'], qua) 104 | assert np.allclose(propensity['coef'], coef) 105 | assert np.allclose(propensity['loglike'], loglike) 106 | assert np.allclose(propensity['fitted'], fitted) 107 | assert np.allclose(propensity['se'], se) 108 | assert_equal(set(propensity.keys()), keys) 109 | 110 | -------------------------------------------------------------------------------- /tests/test_propensityselect.py: -------------------------------------------------------------------------------- 1 | from nose.tools import * 2 | import numpy as np 3 | 4 | import causalinference.core.data as d 5 | import causalinference.core.propensity as p 6 | from utils import random_data 7 | 8 | 9 | def test_get_excluded_lin(): 10 | 11 | K1 = 4 12 | included1 = [] 13 | ans1 = [0, 1, 2, 3] 14 | assert_equal(p.get_excluded_lin(K1, included1), ans1) 15 | 16 | K2 = 4 17 | included2 = [3, 1] 18 | ans2 = [0, 2] 19 | assert_equal(p.get_excluded_lin(K2, included2), ans2) 20 | 21 | K3 = 3 22 | included3 = [0, 1, 2] 23 | ans3 = [] 24 | assert_equal(p.get_excluded_lin(K3, included3), ans3) 25 | 26 | 27 | def test_get_excluded_qua(): 28 | 29 | lin1 = [0, 2, 3] 30 | qua1 = [(0, 3), (3, 3)] 31 | ans1 = [(0, 0), (0, 2), (2, 2), (2, 3)] 32 | assert_equal(p.get_excluded_qua(lin1, qua1), ans1) 33 | 34 | lin2 = [1, 2] 35 | qua2 = [] 36 | ans2 = [(1, 1), (1, 2), (2, 2)] 37 | assert_equal(p.get_excluded_qua(lin2, qua2), ans2) 38 | 39 | lin3 = [8, 5] 40 | qua3 = [(8, 8), (8, 5), (5, 5)] 41 | ans3 = [] 42 | assert_equal(p.get_excluded_qua(lin3, qua3), ans3) 43 | 44 | 45 | def test_calc_loglike(): 46 | 47 | X_c = np.array([[1, 2], [3, 7]]) 48 | X_t = np.array([[1, 4], [3, 6]]) 49 | lin = [1] 50 | qua = [(0, 0)] 51 | ans = -2.567814 52 | assert np.allclose(p.calc_loglike(X_c, X_t, lin, qua), ans) 53 | 54 | 55 | def test_select_lin(): 56 | 57 | Y, D, X = random_data(K=4) 58 | X_c_random, X_t_random = X[D==0], X[D==1] 59 | 60 | lin1 = [0, 1, 2, 3] 61 | C1 = np.random.rand(1) 62 | ans1 = [0, 1, 2, 3] 63 | assert_equal(p.select_lin(X_c_random, X_t_random, lin1, C1), ans1) 64 | 65 | X_c = np.array([[1, 2], [9, 7]]) 66 | X_t = np.array([[1, 4], [9, 6]]) 67 | 68 | lin2 = [] 69 | C2 = 0.07 70 | ans2 = [] 71 | assert_equal(p.select_lin(X_c, X_t, lin2, C2), ans2) 72 | 73 | lin3 = [] 74 | C3 = 0.06 75 | ans3 = [1, 0] 76 | assert_equal(p.select_lin(X_c, X_t, lin3, C3), ans3) 77 | 78 | lin4 = [1] 79 | C4 = 0.35 80 | ans4 = [1] 81 | assert_equal(p.select_lin(X_c, X_t, lin4, C4), ans4) 82 | 83 | lin5 = [1] 84 | C5 = 0.34 85 | ans5 = [1, 0] 86 | assert_equal(p.select_lin(X_c, X_t, lin5, C5), ans5) 87 | 88 | 89 | def test_select_lin_terms(): 90 | 91 | Y, D, X = random_data(K=4) 92 | X_c_random, X_t_random = X[D==0], X[D==1] 93 | 94 | lin1 = [3, 0, 1] 95 | C1 = np.inf 96 | ans1 = [3, 0, 1] 97 | assert_equal(p.select_lin_terms(X_c_random, X_t_random, lin1, C1), ans1) 98 | 99 | lin2 = [2] 100 | C2 = 0 101 | ans2 = [2, 0, 1, 3] 102 | assert_equal(p.select_lin_terms(X_c_random, X_t_random, lin2, C2), ans2) 103 | 104 | lin3 = [] 105 | C3 = 0 106 | ans3 = [0, 1, 2, 3] 107 | assert_equal(p.select_lin_terms(X_c_random, X_t_random, lin3, C3), ans3) 108 | 109 | lin4 = [3, 1] 110 | C4 = -34.234 111 | ans4 = [3, 1, 0, 2] 112 | assert_equal(p.select_lin_terms(X_c_random, X_t_random, lin4, C4), ans4) 113 | 114 | X_c = np.array([[1, 2], [9, 7]]) 115 | X_t = np.array([[1, 4], [9, 7]]) 116 | 117 | lin5 = [] 118 | C5 = 0.06 119 | ans5 = [1, 0] 120 | assert_equal(p.select_lin_terms(X_c, X_t, lin5, C5), ans5) 121 | 122 | 123 | def test_select_qua(): 124 | 125 | Y, D, X = random_data() 126 | X_c_random, X_t_random = X[D==0], X[D==1] 127 | 128 | lin1 = [1, 0] 129 | qua1 = [(1, 0), (0, 0), (1, 1)] 130 | C1 = np.random.rand(1) 131 | ans1 = [(1, 0), (0, 0), (1, 1)] 132 | assert_equal(p.select_qua(X_c_random, X_t_random, lin1, qua1, C1), ans1) 133 | 134 | lin2 = [1] 135 | qua2 = [(1, 1)] 136 | C2 = np.random.rand(1) 137 | ans2 = [(1, 1)] 138 | assert_equal(p.select_qua(X_c_random, X_t_random, lin2, qua2, C2), ans2) 139 | 140 | X_c = np.array([[7, 8], [3, 10], [7, 10]]) 141 | X_t = np.array([[4, 7], [5, 10], [9, 8]]) 142 | 143 | lin3 = [0, 1] 144 | qua3 = [] 145 | C3 = 1.2 146 | ans3 = [] 147 | assert_equal(p.select_qua(X_c, X_t, lin3, qua3, C3), ans3) 148 | 149 | lin4 = [0, 1] 150 | qua4 = [] 151 | C4 = 1.1 152 | ans4 = [(1, 1), (0, 1), (0, 0)] 153 | assert_equal(p.select_qua(X_c, X_t, lin4, qua4, C4), ans4) 154 | 155 | lin5 = [0, 1] 156 | qua5 = [(1, 1)] 157 | C5 = 2.4 158 | ans5 = [(1, 1)] 159 | assert_equal(p.select_qua(X_c, X_t, lin5, qua5, C5), ans5) 160 | 161 | lin6 = [0, 1] 162 | qua6 = [(1, 1)] 163 | C6 = 2.3 164 | ans6 = [(1, 1), (0, 1), (0, 0)] 165 | assert_equal(p.select_qua(X_c, X_t, lin6, qua6, C6), ans6) 166 | 167 | lin7 = [0, 1] 168 | qua7 = [(1, 1), (0, 1)] 169 | C7 = 3.9 170 | ans7 = [(1, 1), (0, 1)] 171 | assert_equal(p.select_qua(X_c, X_t, lin7, qua7, C7), ans7) 172 | 173 | lin8 = [0, 1] 174 | qua8 = [(1, 1), (0, 1)] 175 | C8 = 3.8 176 | ans8 = [(1, 1), (0, 1), (0, 0)] 177 | assert_equal(p.select_qua(X_c, X_t, lin8, qua8, C8), ans8) 178 | 179 | 180 | def test_select_qua_terms(): 181 | 182 | Y, D, X = random_data() 183 | X_c_random, X_t_random = X[D==0], X[D==1] 184 | 185 | lin1 = [0, 1] 186 | C1 = np.inf 187 | ans1 = [] 188 | assert_equal(p.select_qua_terms(X_c_random, X_t_random, lin1, C1), ans1) 189 | 190 | lin2 = [1, 0] 191 | C2 = 0 192 | ans2 = [(1, 1), (1, 0), (0, 0)] 193 | assert_equal(p.select_qua_terms(X_c_random, X_t_random, lin2, C2), ans2) 194 | 195 | lin3 = [0] 196 | C3 = -983.340 197 | ans3 = [(0, 0)] 198 | assert_equal(p.select_qua_terms(X_c_random, X_t_random, lin3, C3), ans3) 199 | 200 | lin4 = [] 201 | C4 = 34.234 202 | ans4 = [] 203 | assert_equal(p.select_qua_terms(X_c_random, X_t_random, lin4, C4), ans4) 204 | 205 | X_c = np.array([[7, 8], [3, 10], [7, 10]]) 206 | X_t = np.array([[4, 7], [5, 10], [9, 8]]) 207 | 208 | lin5 = [0, 1] 209 | C5 = 1.1 210 | ans5 = [(1, 1), (0, 1), (0, 0)] 211 | assert_equal(p.select_qua_terms(X_c, X_t, lin5, C5), ans5) 212 | 213 | 214 | def test_propensityselect(): 215 | 216 | D = np.array([0, 0, 0, 1, 1, 1]) 217 | X = np.array([[7, 8], [3, 10], [7, 10], [4, 7], [5, 10], [9, 8]]) 218 | Y = random_data(D_cur=D, X_cur=X) 219 | data = d.Data(Y, D, X) 220 | 221 | propensity1 = p.PropensitySelect(data, [], 1, 2.71) 222 | lin1 = [1] 223 | qua1 = [] 224 | coef1 = np.array([6.5424027, -0.7392041]) 225 | loglike1 = -3.627939 226 | fitted1 = np.array([0.6522105, 0.2995088, 0.2995088, 227 | 0.7970526, 0.2995088, 0.6522105]) 228 | se1 = np.array([6.8455179, 0.7641445]) 229 | keys = {'lin', 'qua', 'coef', 'loglike', 'fitted', 'se'} 230 | 231 | assert_equal(propensity1['lin'], lin1) 232 | assert_equal(propensity1['qua'], qua1) 233 | assert np.allclose(propensity1['coef'], coef1) 234 | assert np.allclose(propensity1['loglike'], loglike1) 235 | assert np.allclose(propensity1['fitted'], fitted1) 236 | assert np.allclose(propensity1['se'], se1) 237 | assert_equal(set(propensity1.keys()), keys) 238 | 239 | 240 | propensity2 = p.PropensitySelect(data, [0, 1], 1, 2.71) 241 | lin2 = [0, 1] 242 | qua2 = [] 243 | coef2 = np.array([6.8066090, -0.0244874, -0.7524939]) 244 | loglike2 = -3.626517 245 | fitted2 = np.array([0.6491366, 0.3117840, 0.2911631, 246 | 0.8086407, 0.3013733, 0.6379023]) 247 | se2 = np.array([8.5373779, 0.4595191, 0.8106499]) 248 | 249 | assert_equal(propensity2['lin'], lin2) 250 | assert_equal(propensity2['qua'], qua2) 251 | assert np.allclose(propensity2['coef'], coef2) 252 | assert np.allclose(propensity2['loglike'], loglike2) 253 | assert np.allclose(propensity2['fitted'], fitted2) 254 | assert np.allclose(propensity2['se'], se2) 255 | 256 | -------------------------------------------------------------------------------- /tests/test_summary.py: -------------------------------------------------------------------------------- 1 | from nose.tools import * 2 | import numpy as np 3 | 4 | import causalinference.core.data as d 5 | import causalinference.core.summary as s 6 | 7 | 8 | def test_calc_ndiff(): 9 | 10 | ans = -1/np.sqrt(2.5) 11 | assert_equal(s.calc_ndiff(4, 3, 2, 1), ans) 12 | 13 | 14 | def test_summary(): 15 | 16 | Y = np.array([1, 2, 3, 4, 6, 5]) 17 | D = np.array([0, 0, 1, 1, 0, 1]) 18 | X = np.array([[1, 3], [5, 7], [8, 6], [4, 2], [9, 11], [12, 10]]) 19 | data = d.Data(Y, D, X) 20 | summary = s.Summary(data) 21 | 22 | N = 6 23 | K = 2 24 | N_c = 3 25 | N_t = 3 26 | Y_c_mean = 3 27 | Y_t_mean = 4 28 | Y_c_sd = np.sqrt(7) 29 | Y_t_sd = 1 30 | rdiff = 1 31 | X_c_mean = np.array([5, 7]) 32 | X_t_mean = np.array([8, 6]) 33 | X_c_sd = np.array([4, 4]) 34 | X_t_sd = np.array([4, 4]) 35 | ndiff = np.array([0.75, -0.25]) 36 | keys1 = {'N', 'K', 'N_c', 'N_t', 'Y_c_mean', 'Y_t_mean', 'Y_c_sd', 'Y_t_sd', 37 | 'X_c_mean', 'X_t_mean', 'X_c_sd', 'X_t_sd', 'rdiff', 'ndiff'} 38 | 39 | assert_equal(summary['N'], N) 40 | assert_equal(summary['N_c'], N_c) 41 | assert_equal(summary['N_t'], N_t) 42 | assert_equal(summary['Y_c_mean'], Y_c_mean) 43 | assert_equal(summary['Y_t_mean'], Y_t_mean) 44 | assert_equal(summary['Y_c_sd'], Y_c_sd) 45 | assert_equal(summary['Y_t_sd'], Y_t_sd) 46 | assert_equal(summary['rdiff'], rdiff) 47 | assert np.array_equal(summary['X_c_mean'], X_c_mean) 48 | assert np.array_equal(summary['X_t_mean'], X_t_mean) 49 | assert np.array_equal(summary['X_c_sd'], X_c_sd) 50 | assert np.array_equal(summary['X_t_sd'], X_t_sd) 51 | assert np.array_equal(summary['ndiff'], ndiff) 52 | assert_equal(set(summary.keys()), keys1) 53 | 54 | p_c = np.array([0.3, 0.5, 0.7]) 55 | p_t = np.array([0.1, 0.5, 0.9]) 56 | summary._summarize_pscore(p_c, p_t) 57 | keys2 = {'N', 'K', 'N_c', 'N_t', 'Y_c_mean', 'Y_t_mean', 'Y_c_sd', 'Y_t_sd', 58 | 'X_c_mean', 'X_t_mean', 'X_c_sd', 'X_t_sd', 'rdiff', 'ndiff', 59 | 'p_min', 'p_max', 'p_c_mean', 'p_t_mean'} 60 | 61 | assert_equal(summary['p_min'], 0.1) 62 | assert_equal(summary['p_max'], 0.9) 63 | assert_equal(summary['p_c_mean'], 0.5) 64 | assert_equal(summary['p_t_mean'], 0.5) 65 | assert_equal(set(summary.keys()), keys2) 66 | 67 | -------------------------------------------------------------------------------- /tests/test_tools.py: -------------------------------------------------------------------------------- 1 | from nose.tools import * 2 | import numpy as np 3 | 4 | import causalinference.utils.tools as t 5 | 6 | 7 | def test_convert_to_formatting(): 8 | 9 | entry_types = ['string', 'float', 'integer', 'float'] 10 | ans = ['s', '.3f', '.0f', '.3f'] 11 | 12 | assert_equal(list(t.convert_to_formatting(entry_types)), ans) 13 | 14 | 15 | def test_add_row(): 16 | 17 | entries1 = ('Variable', 'Mean', 'S.d.', 'Mean', 'S.d.', 'Raw diff') 18 | entry_types1 = ['string']*6 19 | col_spans1 = [1]*6 20 | width1 = 80 21 | ans1 = ' Variable Mean S.d. Mean S.d. Raw diff\n' 22 | assert_equal(t.add_row(entries1, entry_types1, col_spans1, width1), ans1) 23 | 24 | entries2 = [12, 13.2, -3.14, 9.8765] 25 | entry_types2 = ['integer', 'integer', 'float', 'float'] 26 | col_spans2 = [1, 2, 2, 1] 27 | width2 = 80 28 | ans2 = ' 12 13 -3.140 9.877\n' 29 | assert_equal(t.add_row(entries2, entry_types2, col_spans2, width2), ans2) 30 | 31 | 32 | def test_add_line(): 33 | 34 | width = 30 35 | ans = '------------------------------\n' 36 | 37 | assert_equal(t.add_line(width), ans) 38 | 39 | 40 | def test_gen_reg_entries(): 41 | 42 | varname = 'Income' 43 | coef = 0.5 44 | se = 0.25 45 | ans1 = 'Income' 46 | ans2 = 0.5 47 | ans3 = 0.25 48 | ans4 = 2 49 | ans5 = 0.045500 50 | ans6 = 0.01 51 | ans7 = 0.99 52 | 53 | v, c, s, z, p, lw, up = t.gen_reg_entries(varname, coef, se) 54 | assert_equal(v, ans1) 55 | assert_equal(c, ans2) 56 | assert_equal(s, ans3) 57 | assert_equal(z, ans4) 58 | assert np.allclose(p, ans5) 59 | assert np.allclose(lw, ans6) 60 | assert np.allclose(up, ans7) 61 | 62 | -------------------------------------------------------------------------------- /tests/test_weighting.py: -------------------------------------------------------------------------------- 1 | from nose.tools import * 2 | import numpy as np 3 | 4 | import causalinference.estimators.weighting as w 5 | import causalinference.core.data as d 6 | 7 | 8 | def test_calc_weights(): 9 | 10 | pscore = np.array([0.1, 0.25, 0.5, 0.75, 0.9]) 11 | D = np.array([0, 1, 0, 1, 0]) 12 | 13 | ans = np.array([1.11111, 4, 2, 1.33333, 10]) 14 | assert np.allclose(w.calc_weights(pscore, D), ans) 15 | 16 | 17 | def test_weigh_data(): 18 | 19 | Y = np.array([1, -2, 3, -5, 7]) 20 | D = np.array([0, 1, 0, 1, 0]) 21 | X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]) 22 | weights = np.array([1/0.9, 4, 2, 1/0.75, 10]) 23 | 24 | Y_ans = np.array([1.11111, -8, 6, -6.66667, 70]) 25 | Z_ans = np.array([[1.11111, 0, 1.11111, 2.22222], 26 | [4, 4, 12, 16], 27 | [2, 0, 10, 12], 28 | [1.33333, 1.33333, 9.33333, 10.66667], 29 | [10, 0, 90, 100]]) 30 | Y_out, Z_out = w.weigh_data(Y, D, X, weights) 31 | assert np.allclose(Y_out, Y_ans) 32 | assert np.allclose(Z_out, Z_ans) 33 | 34 | 35 | def test_weighting(): 36 | 37 | Y = np.array([1, -2, 3, -5, 7]) 38 | D = np.array([0, 1, 0, 1, 0]) 39 | X = np.array([3, 2, 3, 5, 5]) 40 | pscore = np.array([0.1, 0.25, 0.5, 0.75, 0.9]) 41 | data = d.Data(Y, D, X) 42 | data._dict['pscore'] = pscore 43 | 44 | weighting = w.Weighting(data) 45 | ate = -6.7963178 46 | ate_se = 2.8125913 47 | keys = {'ate', 'ate_se'} 48 | assert np.allclose(weighting['ate'], ate) 49 | assert np.allclose(weighting['ate_se'], ate_se) 50 | assert_equal(set(weighting.keys()), keys) 51 | 52 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def random_data(N=0, K=0, Y_cur=None, D_cur=None, X_cur=None): 5 | 6 | if X_cur is not None: 7 | N, K = X_cur.shape 8 | elif D_cur is not None: 9 | N = D_cur.shape[0] 10 | elif Y_cur is not None: 11 | N = Y_cur.shape[0] 12 | 13 | if N == 0 and K == 0: 14 | K = np.random.random_integers(1, 5) 15 | N = np.random.random_integers(4, 4*K) 16 | elif N != 0 and K == 0: 17 | K = np.random.random_integers(1, N-1) 18 | elif N == 0 and K != 0: 19 | N = np.random.random_integers(4, 4*K) 20 | 21 | data = [] 22 | if Y_cur is None: 23 | Y_data = np.random.rand(N) 24 | data.append(Y_data) 25 | if D_cur is None: 26 | D_data = np.random.random_integers(0, 1, N) 27 | # loop to ensure at least two subjects in each group 28 | while D_data.sum() <= 1 or D_data.sum() >= N-1: 29 | D_data = np.random.random_integers(0, 1, N) 30 | data.append(D_data) 31 | if X_cur is None: 32 | X_data = np.random.rand(N, K) 33 | data.append(X_data) 34 | 35 | if len(data) == 1: 36 | return data[0] 37 | else: 38 | return data 39 | 40 | --------------------------------------------------------------------------------