├── .gitignore
├── COPYRIGHTS.txt
├── LICENSE.txt
├── MANIFEST.in
├── README.rst
├── causalinference
    ├── __init__.py
    ├── causal.py
    ├── core
    │   ├── __init__.py
    │   ├── data.py
    │   ├── propensity.py
    │   ├── strata.py
    │   └── summary.py
    ├── estimators
    │   ├── __init__.py
    │   ├── base.py
    │   ├── blocking.py
    │   ├── matching.py
    │   ├── ols.py
    │   └── weighting.py
    └── utils
    │   ├── __init__.py
    │   ├── lalonde_data.txt
    │   ├── tools.py
    │   └── vignette_data.txt
├── docs
    ├── Makefile
    ├── _templates
    │   └── layout.html
    ├── causalinference.core.rst
    ├── causalinference.estimators.rst
    ├── causalinference.rst
    ├── causalinference.utils.rst
    ├── conf.py
    ├── favicon.png
    ├── index.rst
    └── tex
    │   ├── references.bib
    │   ├── vignette.pdf
    │   └── vignette.tex
├── setup.py
└── tests
    ├── test_blocking.py
    ├── test_causal.py
    ├── test_data.py
    ├── test_matching.py
    ├── test_ols.py
    ├── test_propensity.py
    ├── test_propensityselect.py
    ├── test_summary.py
    ├── test_tools.py
    ├── test_weighting.py
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.DS_Store
 2 | *.pyc
 3 | *.swp
 4 | *.log
 5 | *.aux
 6 | *.bbl
 7 | *.blg
 8 | *.bak
 9 | *.egg-info
10 | *.R
11 | dist
12 | interactive.py
13 | docs/_build
14 | 


--------------------------------------------------------------------------------
/COPYRIGHTS.txt:
--------------------------------------------------------------------------------
 1 | The license of causalinference can be found in LICENSE.txt
 2 | 
 3 | causalinference contains code or derivative code from several other
 4 | packages. Collected below are the copyright statements of code from
 5 | other packages.
 6 | 
 7 | numpy
 8 | ---------------------------------------------------------------------------
 9 | Copyright (c) 2005-2009, NumPy Developers.
10 | All rights reserved.
11 | 
12 | Redistribution and use in source and binary forms, with or without
13 | modification, are permitted provided that the following conditions are
14 | met:
15 | 
16 |     * Redistributions of source code must retain the above copyright
17 |        notice, this list of conditions and the following disclaimer.
18 | 
19 |     * Redistributions in binary form must reproduce the above
20 |        copyright notice, this list of conditions and the following
21 |        disclaimer in the documentation and/or other materials provided
22 |        with the distribution.
23 | 
24 |     * Neither the name of the NumPy Developers nor the names of any
25 |        contributors may be used to endorse or promote products derived
26 |        from this software without specific prior written permission.
27 | 
28 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 | ---------------------------------------------------------------------------
40 | 
41 | scipy
42 | ---------------------------------------------------------------------------
43 | Copyright (c) 2001, 2002 Enthought, Inc.
44 | All rights reserved.
45 | 
46 | Copyright (c) 2003-2009 SciPy Developers.
47 | All rights reserved.
48 | 
49 | Redistribution and use in source and binary forms, with or without
50 | modification, are permitted provided that the following conditions are met:
51 | 
52 |   a. Redistributions of source code must retain the above copyright notice,
53 |      this list of conditions and the following disclaimer.
54 |   b. Redistributions in binary form must reproduce the above copyright
55 |      notice, this list of conditions and the following disclaimer in the
56 |      documentation and/or other materials provided with the distribution.
57 |   c. Neither the name of the Enthought nor the names of its contributors
58 |      may be used to endorse or promote products derived from this software
59 |      without specific prior written permission.
60 | 
61 | 
62 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
63 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
66 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
67 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
68 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
69 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
70 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
71 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
72 | DAMAGE.
73 | ---------------------------------------------------------------------------
74 | 
75 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2015, Laurence Wong
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 |   1. Redistributions of source code must retain the above copyright notice,
 8 |      this list of conditions and the following disclaimer.
 9 |   2. Redistributions in binary form must reproduce the above copyright
10 |      notice, this list of conditions and the following disclaimer in the
11 |      documentation and/or other materials provided with the distribution.
12 |   3. Neither the name of the copyright holder nor the names of its
13 |      contributors may be used to endorse or promote products derived from
14 |      this software without specific prior written permission.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 | POSSIBILITY OF SUCH DAMAGE.
27 | 
28 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | global-include *.py
 2 | include *.txt
 3 | include MANIFEST.in
 4 | include README.rst
 5 | include causalinference/utils/*.txt
 6 | 
 7 | exclude interactive.py
 8 | global-exclude *.swp *.pyc
 9 | 
10 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | Causal Inference in Python
 2 | ==========================
 3 | 
 4 | *Causal Inference in Python*, or *Causalinference* in short, is a software package that implements various statistical and econometric methods used in the field variously known as Causal Inference, Program Evaluation, or Treatment Effect Analysis.
 5 | 
 6 | Work on *Causalinference* started in 2014 by Laurence Wong as a personal side project. It is distributed under the 3-Clause BSD license.
 7 | 
 8 | Important Links
 9 | ===============
10 | 
11 | The official website for *Causalinference* is
12 | 
13 |   https://causalinferenceinpython.org
14 | 
15 | The most current development version is hosted on GitHub at
16 | 
17 |   https://github.com/laurencium/causalinference
18 | 
19 | Package source and binary distribution files are available from PyPi at
20 | 
21 |   https://pypi.python.org/pypi/causalinference
22 | 
23 | For an overview of the main features and uses of *Causalinference*, please refer to
24 | 
25 |   https://github.com/laurencium/causalinference/blob/master/docs/tex/vignette.pdf
26 | 
27 | A blog dedicated to providing a more detailed walkthrough of *Causalinference* and the econometric theory behind it can be found at
28 | 
29 |   https://laurencewong.com/software/
30 | 
31 | Main Features
32 | =============
33 | 
34 | * Assessment of overlap in covariate distributions
35 | * Estimation of propensity score
36 | * Improvement of covariate balance through trimming
37 | * Subclassification on propensity score
38 | * Estimation of treatment effects via matching, blocking, weighting, and least squares
39 | 
40 | Dependencies
41 | ============
42 | 
43 | * NumPy: 1.8.2 or higher
44 | * SciPy: 0.13.3 or higher
45 | 
46 | Installation
47 | ============
48 | 
49 | *Causalinference* can be installed using ``pip``: ::
50 | 
51 |   $ pip install causalinference
52 | 
53 | For help on setting up Pip, NumPy, and SciPy on Macs, check out this excellent `guide <http://www.sourabhbajaj.com/mac-setup>`_.
54 | 
55 | Minimal Example
56 | ===============
57 | 
58 | The following illustrates how to create an instance of CausalModel: ::
59 | 
60 |   >>> from causalinference import CausalModel
61 |   >>> from causalinference.utils import random_data
62 |   >>> Y, D, X = random_data()
63 |   >>> causal = CausalModel(Y, D, X)
64 | 
65 | Invoking ``help`` on ``causal`` at this point should return a comprehensive listing of all the causal analysis tools available in *Causalinference*.
66 | 
67 | 


--------------------------------------------------------------------------------
/causalinference/__init__.py:
--------------------------------------------------------------------------------
1 | from .causal import CausalModel
2 | 
3 | 


--------------------------------------------------------------------------------
/causalinference/causal.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import numpy as np
  3 | from itertools import combinations_with_replacement
  4 | 
  5 | from .core import Data, Summary, Propensity, PropensitySelect, Strata
  6 | from .estimators import OLS, Blocking, Weighting, Matching, Estimators
  7 | 
  8 | 
  9 | class CausalModel(object):
 10 | 
 11 | 	"""
 12 | 	Class that provides the main tools of Causal Inference.
 13 | 	"""
 14 | 
 15 | 	def __init__(self, Y, D, X):
 16 | 
 17 | 		self.old_data = Data(Y, D, X)
 18 | 		self.reset()
 19 | 
 20 | 
 21 | 	def reset(self):
 22 | 
 23 | 		"""
 24 | 		Reinitializes data to original inputs, and drops any estimated
 25 | 		results.
 26 | 		"""
 27 | 
 28 | 		Y, D, X = self.old_data['Y'], self.old_data['D'], self.old_data['X']
 29 | 		self.raw_data = Data(Y, D, X)
 30 | 		self.summary_stats = Summary(self.raw_data)
 31 | 		self.propensity = None
 32 | 		self.cutoff = None
 33 | 		self.blocks = None
 34 | 		self.strata = None
 35 | 		self.estimates = Estimators()
 36 | 
 37 | 
 38 | 	def est_propensity(self, lin='all', qua=None):
 39 | 
 40 | 		"""
 41 | 		Estimates the propensity scores given list of covariates to
 42 | 		include linearly or quadratically.
 43 | 
 44 | 		The propensity score is the conditional probability of
 45 | 		receiving the treatment given the observed covariates.
 46 | 		Estimation is done via a logistic regression.
 47 | 
 48 | 		Parameters
 49 | 		----------
 50 | 		lin: string or list, optional
 51 | 			Column numbers (zero-based) of variables of
 52 | 			the original covariate matrix X to include
 53 | 			linearly. Defaults to the string 'all', which
 54 | 			uses whole covariate matrix.
 55 | 		qua: list, optional
 56 | 			Tuples indicating which columns of the original
 57 | 			covariate matrix to multiply and include. E.g.,
 58 | 			[(1,1), (2,3)] indicates squaring the 2nd column
 59 | 			and including the product of the 3rd and 4th
 60 | 			columns. Default is to not include any
 61 | 			quadratic terms.
 62 | 		"""
 63 | 
 64 | 		lin_terms = parse_lin_terms(self.raw_data['K'], lin)
 65 | 		qua_terms = parse_qua_terms(self.raw_data['K'], qua)
 66 | 
 67 | 		self.propensity = Propensity(self.raw_data, lin_terms, qua_terms)
 68 | 		self.raw_data._dict['pscore'] = self.propensity['fitted']
 69 | 		self._post_pscore_init()
 70 | 
 71 | 
 72 | 	def est_propensity_s(self, lin_B=None, C_lin=1, C_qua=2.71):
 73 | 	
 74 | 		"""
 75 | 		Estimates the propensity score with covariates selected using
 76 | 		the algorithm suggested by [1]_.
 77 | 
 78 | 		The propensity score is the conditional probability of
 79 | 		receiving the treatment given the observed covariates.
 80 | 		Estimation is done via a logistic regression.
 81 | 
 82 | 		The covariate selection algorithm is based on a sequence
 83 | 		of likelihood ratio tests.
 84 | 
 85 | 		Parameters
 86 | 		----------
 87 | 		lin_B: list, optional
 88 | 			Column numbers (zero-based) of variables of
 89 | 			the original covariate matrix X to include
 90 | 			linearly. Defaults to empty list, meaning
 91 | 			every column of X is subjected to the
 92 | 			selection algorithm.
 93 | 		C_lin: scalar, optional
 94 | 			Critical value used in likelihood ratio tests
 95 | 			to decide whether candidate linear terms should
 96 | 			be included. Defaults to 1 as in [1]_.
 97 | 		C_qua: scalar, optional
 98 | 			Critical value used in likelihood ratio tests
 99 | 			to decide whether candidate quadratic terms
100 | 			should be included. Defaults to 2.71 as in
101 | 			[1]_.
102 | 
103 | 		References
104 | 		----------
105 | 		.. [1] Imbens, G. & Rubin, D. (2015). Causal Inference in
106 | 			Statistics, Social, and Biomedical Sciences: An
107 | 			Introduction.
108 | 		"""
109 | 
110 | 		lin_basic = parse_lin_terms(self.raw_data['K'], lin_B)
111 | 
112 | 		self.propensity = PropensitySelect(self.raw_data, lin_basic,
113 | 		                                   C_lin, C_qua)
114 | 		self.raw_data._dict['pscore'] = self.propensity['fitted']
115 | 		self._post_pscore_init()
116 | 
117 | 
118 | 	def trim(self):
119 | 
120 | 		"""
121 | 		Trims data based on propensity score to create a subsample with
122 | 		better covariate balance.
123 | 		
124 | 		The default cutoff value is set to 0.1. To set a custom cutoff
125 | 		value, modify the object attribute named cutoff directly.
126 | 
127 | 		This method should only be executed after the propensity score
128 | 		has been estimated.
129 | 		"""
130 | 
131 | 		if 0 < self.cutoff <= 0.5:
132 | 			pscore = self.raw_data['pscore']
133 | 			keep = (pscore >= self.cutoff) & (pscore <= 1-self.cutoff)
134 | 			Y_trimmed = self.raw_data['Y'][keep]
135 | 			D_trimmed = self.raw_data['D'][keep]
136 | 			X_trimmed = self.raw_data['X'][keep]
137 | 			self.raw_data = Data(Y_trimmed, D_trimmed, X_trimmed)
138 | 			self.raw_data._dict['pscore'] = pscore[keep]
139 | 			self.summary_stats = Summary(self.raw_data)
140 | 			self.strata = None
141 | 			self.estimates = Estimators()
142 | 		elif self.cutoff == 0:
143 | 			pass
144 | 		else:
145 | 			raise ValueError('Invalid cutoff.')
146 | 
147 | 
148 | 	def trim_s(self):
149 | 
150 | 		"""
151 | 		Trims data based on propensity score using the cutoff
152 | 		selection algorithm suggested by [1]_.
153 | 		
154 | 		This method should only be executed after the propensity score
155 | 		has been estimated.
156 | 
157 | 		References
158 | 		----------
159 | 		.. [1] Crump, R., Hotz, V., Imbens, G., & Mitnik, O. (2009).
160 | 			Dealing with Limited Overlap in Estimation of
161 | 			Average Treatment Effects. Biometrika, 96, 187-199.
162 | 		"""
163 | 
164 | 		pscore = self.raw_data['pscore']
165 | 		g = 1.0/(pscore*(1-pscore))  # 1 over Bernoulli variance
166 | 
167 | 		self.cutoff = select_cutoff(g)
168 | 		self.trim()
169 | 
170 | 
171 | 	def stratify(self):
172 | 
173 | 		"""
174 | 		Stratifies the sample based on propensity score.
175 | 		
176 | 		By default the sample is divided into five equal-sized bins.
177 | 		The number of bins can be set by modifying the object
178 | 		attribute named blocks. Alternatively, custom-sized bins can
179 | 		be created by setting blocks equal to a sorted list of numbers
180 | 		between 0 and 1 indicating the bin boundaries.
181 | 
182 | 		This method should only be executed after the propensity score
183 | 		has been estimated.
184 | 		"""
185 | 
186 | 		Y, D, X = self.raw_data['Y'], self.raw_data['D'], self.raw_data['X']
187 | 		pscore = self.raw_data['pscore']
188 | 
189 | 		if isinstance(self.blocks, int):
190 | 			blocks = split_equal_bins(pscore, self.blocks)
191 | 		else:
192 | 			blocks = self.blocks[:]  # make a copy; should be sorted
193 | 			blocks[0] = 0  # avoids always dropping 1st unit
194 | 
195 | 		def subset(p_low, p_high):
196 | 			return (p_low < pscore) & (pscore <= p_high)
197 | 		subsets = [subset(*ps) for ps in zip(blocks, blocks[1:])]
198 | 		strata = [CausalModel(Y[s], D[s], X[s]) for s in subsets]
199 | 		self.strata = Strata(strata, subsets, pscore)
200 | 
201 | 
202 | 	def stratify_s(self):
203 | 
204 | 		"""
205 | 		Stratifies the sample based on propensity score using the
206 | 		bin selection procedure suggested by [1]_.
207 | 
208 | 		The bin selection algorithm is based on a sequence of
209 | 		two-sample t tests performed on the log-odds ratio.
210 | 
211 | 		This method should only be executed after the propensity score
212 | 		has been estimated.
213 | 
214 | 		References
215 | 		----------
216 | 		.. [1] Imbens, G. & Rubin, D. (2015). Causal Inference in
217 | 			Statistics, Social, and Biomedical Sciences: An
218 | 			Introduction.
219 | 		"""
220 | 
221 | 		pscore_order = self.raw_data['pscore'].argsort()
222 | 		pscore = self.raw_data['pscore'][pscore_order]
223 | 		D = self.raw_data['D'][pscore_order]
224 | 		logodds = np.log(pscore / (1-pscore))
225 | 		K = self.raw_data['K']
226 | 
227 | 		blocks_uniq = set(select_blocks(pscore, logodds, D, K, 0, 1))
228 | 		self.blocks = sorted(blocks_uniq)
229 | 		self.stratify()
230 | 
231 | 
232 | 	def est_via_ols(self, adj=2):
233 | 
234 | 		"""
235 | 		Estimates average treatment effects using least squares.
236 | 
237 | 		Parameters
238 | 		----------
239 | 		adj: int (0, 1, or 2)
240 | 			Indicates how covariate adjustments are to be
241 | 			performed. Set adj = 0 to not include any
242 | 			covariates.  Set adj = 1 to include treatment
243 | 			indicator D and covariates X separately. Set
244 | 			adj = 2 to additionally include interaction
245 | 			terms between D and X. Defaults to 2.
246 | 		"""
247 | 
248 | 		self.estimates['ols'] = OLS(self.raw_data, adj)
249 | 
250 | 
251 | 	def est_via_blocking(self, adj=1):
252 | 
253 | 		"""
254 | 		Estimates average treatment effects using regression within
255 | 		blocks.
256 | 		
257 | 		This method should only be executed after the sample has been
258 | 		stratified.
259 | 
260 | 		Parameters
261 | 		----------
262 | 		adj: int (0, 1, or 2)
263 | 			Indicates how covariate adjustments are to be
264 | 			performed for each within-bin regression.
265 | 			Set adj = 0 to not include any covariates.
266 | 			Set adj = 1 to include treatment indicator D
267 | 			and covariates X separately. Set adj = 2 to
268 | 			additionally include interaction terms between
269 | 			D and X. Defaults to 1.
270 | 		"""
271 | 
272 | 		self.estimates['blocking'] = Blocking(self.strata, adj)
273 | 
274 | 
275 | 	def est_via_weighting(self):
276 | 
277 | 		"""
278 | 		Estimates average treatment effects using doubly-robust
279 | 		version of the Horvitz-Thompson weighting estimator.
280 | 		"""
281 | 
282 | 		self.estimates['weighting'] = Weighting(self.raw_data)
283 | 
284 | 
285 | 	def est_via_matching(self, weights='inv', matches=1, bias_adj=False):
286 | 
287 | 		"""
288 | 		Estimates average treatment effects using nearest-
289 | 		neighborhood matching.
290 | 
291 | 		Matching is done with replacement. Method supports multiple
292 | 		matching. Correcting bias that arise due to imperfect matches
293 | 		is also supported. For details on methodology, see [1]_.
294 | 
295 | 		Parameters
296 | 		----------
297 | 		weights: str or positive definite square matrix
298 | 			Specifies weighting matrix used in computing
299 | 			distance measures. Defaults to string 'inv',
300 | 			which does inverse variance weighting. String
301 | 			'maha' gives the weighting matrix used in the
302 | 			Mahalanobis metric.
303 | 		matches: int
304 | 			Number of matches to use for each subject.
305 | 		bias_adj: bool
306 | 			Specifies whether bias adjustments should be
307 | 			attempted.
308 | 
309 | 		References
310 | 		----------
311 | 		.. [1] Imbens, G. & Rubin, D. (2015). Causal Inference in
312 | 			Statistics, Social, and Biomedical Sciences: An
313 | 			Introduction.
314 | 		"""
315 | 
316 | 		X, K = self.raw_data['X'], self.raw_data['K']
317 | 		X_c, X_t = self.raw_data['X_c'], self.raw_data['X_t']
318 | 
319 | 		if weights == 'inv':
320 | 			W = 1/X.var(0)
321 | 		elif weights == 'maha':
322 | 			V_c = np.cov(X_c, rowvar=False, ddof=0)
323 | 			V_t = np.cov(X_t, rowvar=False, ddof=0)
324 | 			if K == 1:
325 | 				W = 1/np.array([[(V_c+V_t)/2]])  # matrix form
326 | 			else:
327 | 				W = np.linalg.inv((V_c+V_t)/2)
328 | 		else:
329 | 			W = weights
330 | 
331 | 		self.estimates['matching'] = Matching(self.raw_data, W,
332 | 		                                      matches, bias_adj)
333 | 
334 | 
335 | 	def _post_pscore_init(self):
336 | 
337 | 		self.cutoff = 0.1
338 | 		self.blocks = 5
339 | 
340 | 
341 | def parse_lin_terms(K, lin):
342 | 
343 | 	if lin is None:
344 | 		return []
345 | 	elif lin == 'all':
346 | 		return range(K)
347 | 	else:
348 | 		return lin
349 | 
350 | 
351 | def parse_qua_terms(K, qua):
352 | 
353 | 	if qua is None:
354 | 		return []
355 | 	elif qua == 'all':
356 | 		return list(combinations_with_replacement(range(K), 2))
357 | 	else:
358 | 		return qua
359 | 
360 | 
361 | def sumlessthan(g, sorted_g, cumsum):
362 | 
363 | 	deduped_values = dict(zip(sorted_g, cumsum))
364 | 
365 | 	return np.array([deduped_values[x] for x in g])
366 | 
367 | 
368 | def select_cutoff(g):
369 | 
370 | 	if g.max() <= 2*g.mean():
371 | 		cutoff = 0
372 | 	else:
373 | 		sorted_g = np.sort(g)
374 | 		cumsum_1 = range(1, len(g)+1)
375 | 		LHS = g * sumlessthan(g, sorted_g, cumsum_1)
376 | 		cumsum_g = np.cumsum(sorted_g)
377 | 		RHS = 2 * sumlessthan(g, sorted_g, cumsum_g)
378 | 		gamma = np.max(g[LHS <= RHS])
379 | 		cutoff = 0.5 - np.sqrt(0.25 - 1./gamma)
380 | 
381 | 	return cutoff
382 | 
383 | 
384 | def split_equal_bins(pscore, blocks):
385 | 
386 | 	q = np.linspace(0, 100, blocks+1)[1:-1]  # q as in qth centiles
387 | 	centiles = [np.percentile(pscore, x) for x in q]
388 | 
389 | 	return [0] + centiles + [1]
390 | 
391 | 
392 | def calc_tstat(sample_c, sample_t):
393 | 
394 | 	N_c = sample_c.shape[0]
395 | 	N_t = sample_t.shape[0]
396 | 	var_c = sample_c.var(ddof=1)
397 | 	var_t = sample_t.var(ddof=1)
398 | 
399 | 	return (sample_t.mean()-sample_c.mean()) / np.sqrt(var_c/N_c+var_t/N_t)
400 | 
401 | 
402 | def calc_sample_sizes(D):
403 | 
404 | 	N = D.shape[0]
405 | 	mid_index  = N // 2
406 | 	
407 | 	Nleft = mid_index
408 | 	Nleft_t = D[:mid_index].sum()
409 | 	Nleft_c = Nleft - Nleft_t
410 | 
411 | 	Nright = N - Nleft
412 | 	Nright_t = D[mid_index:].sum()
413 | 	Nright_c = Nright - Nright_t
414 | 
415 | 	return (Nleft_c, Nleft_t, Nright_c, Nright_t)
416 | 
417 | 
418 | def select_blocks(pscore, logodds, D, K, p_low, p_high):
419 | 
420 | 	scope = (pscore >= p_low) & (pscore <= p_high)
421 | 	c, t = (scope & (D==0)), (scope & (D==1))
422 | 
423 | 	Nleft_c, Nleft_t, Nright_c, Nright_t = calc_sample_sizes(D[scope])
424 | 	if min(Nleft_c, Nleft_t, Nright_c, Nright_t) < K+1:
425 | 		return [p_low, p_high]
426 | 
427 | 	tstat = calc_tstat(logodds[c], logodds[t])
428 | 	if tstat <= 1.96:
429 | 		return [p_low, p_high]
430 | 
431 | 	low = pscore[scope][0]
432 | 	mid = pscore[scope][scope.sum() // 2]
433 | 	high = pscore[scope][-1]
434 | 
435 | 	return select_blocks(pscore, logodds, D, K, low, mid) + \
436 | 	       select_blocks(pscore, logodds, D, K, mid, high)
437 | 
438 | 


--------------------------------------------------------------------------------
/causalinference/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .data import Dict, Data
2 | from .summary import Summary
3 | from .propensity import Propensity, PropensitySelect
4 | from .strata import Strata
5 | 
6 | 


--------------------------------------------------------------------------------
/causalinference/core/data.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class Dict(object):
 5 | 
 6 | 	"""
 7 | 	Dictionary-mimicking class.
 8 | 	"""
 9 | 
10 | 	def __getitem__(self, key):
11 | 
12 | 		return self._dict[key]
13 | 
14 | 
15 | 	def __iter__(self):
16 | 
17 | 		return iter(self._dict)
18 | 
19 | 	
20 | 	def __repr__(self):
21 | 
22 | 		return self._dict.__repr__()
23 | 
24 | 
25 | 	def keys(self):
26 | 
27 | 		return self._dict.keys()
28 | 
29 | 	
30 | 	def iteritems(self):
31 | 
32 | 		return self._dict.iteritems()
33 | 
34 | 
35 | 	def get(self, key, default=None):
36 | 	
37 | 		return self._dict.get(key, default)
38 | 
39 | 
40 | class Data(Dict):
41 | 
42 | 	"""
43 | 	Dictionary-like class containing basic data.
44 | 	"""
45 | 
46 | 	def __init__(self, outcome, treatment, covariates):
47 | 
48 | 		Y, D, X = preprocess(outcome, treatment, covariates)
49 | 		self._dict = dict()
50 | 		self._dict['Y'] = Y
51 | 		self._dict['D'] = D
52 | 		self._dict['X'] = X
53 | 		self._dict['N'], self._dict['K'] = X.shape
54 | 		self._dict['controls'] = (D==0)
55 | 		self._dict['treated'] = (D==1)
56 | 		self._dict['Y_c'] = Y[self._dict['controls']]
57 | 		self._dict['Y_t'] = Y[self._dict['treated']]
58 | 		self._dict['X_c'] = X[self._dict['controls']]
59 | 		self._dict['X_t'] = X[self._dict['treated']]
60 | 		self._dict['N_t'] = D.sum()
61 | 		self._dict['N_c'] = self._dict['N'] - self._dict['N_t']
62 | 		if self._dict['K']+1 > self._dict['N_c']:
63 | 			raise ValueError('Too few control units: N_c < K+1')
64 | 		if self._dict['K']+1 > self._dict['N_t']:
65 | 			raise ValueError('Too few treated units: N_t < K+1')
66 | 
67 | 
68 | def preprocess(Y, D, X):
69 | 
70 | 	if Y.shape[0] == D.shape[0] == X.shape[0]:
71 | 		N = Y.shape[0]
72 | 	else:
73 | 		raise IndexError('Input data have different number of rows')
74 | 
75 | 	if Y.shape != (N, ):
76 | 		Y.shape = (N, )
77 | 	if D.shape != (N, ):
78 | 		D.shape = (N, )
79 | 	if D.dtype != 'int':
80 | 		D = D.astype(int)
81 | 	if X.shape == (N, ):
82 | 		X.shape = (N, 1)
83 | 
84 | 	return (Y, D, X)
85 | 
86 | 


--------------------------------------------------------------------------------
/causalinference/core/propensity.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import numpy as np
  3 | from scipy.optimize import fmin_bfgs
  4 | from itertools import combinations_with_replacement
  5 | 
  6 | import causalinference.utils.tools as tools
  7 | from .data import Dict
  8 | 
  9 | 
 10 | class Propensity(Dict):
 11 | 
 12 | 	"""
 13 | 	Dictionary-like class containing propensity score data.
 14 | 	
 15 | 	Propensity score related data includes estimated logistic regression
 16 | 	coefficients, maximized log-likelihood, predicted propensity scores,
 17 | 	and lists of the linear and quadratic terms that are included in the
 18 | 	logistic regression.
 19 | 	"""
 20 | 
 21 | 	def __init__(self, data, lin, qua):
 22 | 
 23 | 		Z = form_matrix(data['X'], lin, qua)
 24 | 		Z_c, Z_t = Z[data['controls']], Z[data['treated']]
 25 | 		beta = calc_coef(Z_c, Z_t)
 26 | 
 27 | 		self._data = data
 28 | 		self._dict = dict()
 29 | 		self._dict['lin'], self._dict['qua'] = lin, qua
 30 | 		self._dict['coef'] = beta
 31 | 		self._dict['loglike'] = -neg_loglike(beta, Z_c, Z_t)
 32 | 		self._dict['fitted'] = sigmoid(Z.dot(beta))
 33 | 		self._dict['se'] = calc_se(Z, self._dict['fitted'])
 34 | 
 35 | 
 36 | 	def __str__(self):
 37 | 
 38 | 		table_width = 80
 39 | 
 40 | 		coefs = self._dict['coef']
 41 | 		ses = self._dict['se']
 42 | 
 43 | 		output = '\n'
 44 | 		output += 'Estimated Parameters of Propensity Score\n\n'
 45 | 
 46 | 		entries1 = ['', 'Coef.', 'S.e.', 'z', 'P>|z|',
 47 | 		           '[95% Conf. int.]']
 48 | 		entry_types1 = ['string']*6
 49 | 		col_spans1 = [1]*5 + [2]
 50 | 		output += tools.add_row(entries1, entry_types1,
 51 | 		                        col_spans1, table_width)
 52 | 		output += tools.add_line(table_width)
 53 | 
 54 | 		entries2 = tools.gen_reg_entries('Intercept', coefs[0], ses[0])
 55 | 		entry_types2 = ['string'] + ['float']*6
 56 | 		col_spans2 = [1]*7
 57 | 		output += tools.add_row(entries2, entry_types2,
 58 | 		                        col_spans2, table_width)
 59 | 
 60 | 		lin = self._dict['lin']
 61 | 		for (lin_term, coef, se) in zip(lin, coefs[1:], ses[1:]):
 62 | 			entries3 = tools.gen_reg_entries('X'+str(lin_term),
 63 | 			                                 coef, se)
 64 | 			output += tools.add_row(entries3, entry_types2,
 65 | 			                        col_spans2, table_width)
 66 | 
 67 | 		qua = self._dict['qua']
 68 | 		lin_num = len(lin)+1  # including intercept
 69 | 		for (qua_term, coef, se) in zip(qua, coefs[lin_num:],
 70 | 		                                ses[lin_num:]):
 71 | 			name = 'X'+str(qua_term[0])+'*X'+str(qua_term[1])
 72 | 			entries4 = tools.gen_reg_entries(name, coef, se)
 73 | 			output += tools.add_row(entries4, entry_types2,
 74 | 			                        col_spans2, table_width)
 75 | 
 76 | 		return output
 77 | 
 78 | 
 79 | class PropensitySelect(Propensity):
 80 | 
 81 | 	"""
 82 | 	Dictionary-like class containing propensity score data.
 83 | 	
 84 | 	Propensity score related data includes estimated logistic regression
 85 | 	coefficients, maximized log-likelihood, predicted propensity scores,
 86 | 	and lists of the linear and quadratic terms that are included in the
 87 | 	logistic regression.
 88 | 	"""
 89 | 
 90 | 	def __init__(self, data, lin_B, C_lin, C_qua):
 91 | 
 92 | 		X_c, X_t = data['X_c'], data['X_t']
 93 | 		lin = select_lin_terms(X_c, X_t, lin_B, C_lin)
 94 | 		qua = select_qua_terms(X_c, X_t, lin, C_qua)
 95 | 
 96 | 		super(PropensitySelect, self).__init__(data, lin, qua)
 97 | 
 98 | 
 99 | def form_matrix(X, lin, qua):
100 | 
101 | 	N, K = X.shape
102 | 
103 | 	mat = np.empty((N, 1+len(lin)+len(qua)))
104 | 	mat[:, 0] = 1  # constant term
105 | 
106 | 	current_col = 1
107 | 	if lin:
108 | 		mat[:, current_col:current_col+len(lin)] = X[:, lin]
109 | 		current_col += len(lin)
110 | 	for term in qua:  # qua is a list of tuples of column numbers
111 | 		mat[:, current_col] = X[:, term[0]] * X[:, term[1]]
112 | 		current_col += 1
113 | 
114 | 	return mat
115 | 
116 | 
117 | def sigmoid(x, top_threshold=100, bottom_threshold=-100):
118 | 
119 | 	high_x = (x >= top_threshold)
120 | 	low_x = (x <= bottom_threshold)
121 | 	mid_x = ~(high_x | low_x)
122 | 
123 | 	values = np.empty(x.shape[0])
124 | 	values[high_x] = 1.0
125 | 	values[low_x] = 0.0
126 | 	values[mid_x] = 1/(1+np.exp(-x[mid_x]))
127 | 
128 | 	return values
129 | 
130 | 
131 | def log1exp(x, top_threshold=100, bottom_threshold=-100):
132 | 
133 | 	high_x = (x >= top_threshold)
134 | 	low_x = (x <= bottom_threshold)
135 | 	mid_x = ~(high_x | low_x)
136 | 
137 | 	values = np.empty(x.shape[0])
138 | 	values[high_x] = 0.0
139 | 	values[low_x] = -x[low_x]
140 | 	values[mid_x] = np.log(1 + np.exp(-x[mid_x]))
141 | 
142 | 	return values
143 | 
144 | 
145 | def neg_loglike(beta, X_c, X_t):
146 | 
147 | 	return log1exp(X_t.dot(beta)).sum() + log1exp(-X_c.dot(beta)).sum()
148 | 
149 | 
150 | def neg_gradient(beta, X_c, X_t):
151 | 
152 | 	return (sigmoid(X_c.dot(beta))*X_c.T).sum(1) - \
153 | 	       (sigmoid(-X_t.dot(beta))*X_t.T).sum(1)
154 | 
155 | 
156 | def calc_coef(X_c, X_t):
157 | 
158 | 	K = X_c.shape[1]
159 | 
160 | 	neg_ll = lambda b: neg_loglike(b, X_c, X_t)
161 | 	neg_grad = lambda b: neg_gradient(b, X_c, X_t)
162 | 
163 | 	logit = fmin_bfgs(neg_ll, np.zeros(K), neg_grad,
164 | 			  full_output=True, disp=False)
165 | 
166 | 	return logit[0]
167 | 
168 | 
169 | def calc_se(X, phat):
170 | 
171 | 	H = np.dot(phat*(1-phat)*X.T, X)
172 | 	
173 | 	return np.sqrt(np.diag(np.linalg.inv(H)))
174 | 
175 | 
176 | def get_excluded_lin(K, included):
177 | 
178 | 	included_set = set(included)
179 | 
180 | 	return [x for x in range(K) if x not in included_set]
181 | 
182 | 
183 | def get_excluded_qua(lin, included):
184 | 
185 | 	whole_set = list(combinations_with_replacement(lin, 2))
186 | 	included_set = set(included)
187 | 
188 | 	return [x for x in whole_set if x not in included_set]
189 | 
190 | 
191 | def calc_loglike(X_c, X_t, lin, qua):
192 | 
193 | 	Z_c = form_matrix(X_c, lin, qua)
194 | 	Z_t = form_matrix(X_t, lin, qua)
195 | 	beta = calc_coef(Z_c, Z_t)
196 | 
197 | 	return -neg_loglike(beta, Z_c, Z_t)
198 | 
199 | 
200 | def select_lin(X_c, X_t, lin_B, C_lin):
201 | 
202 | 	# Selects, through a sequence of likelihood ratio tests, the
203 | 	# variables that should be included linearly in propensity
204 | 	# score estimation.
205 | 
206 | 	K = X_c.shape[1]
207 | 	excluded = get_excluded_lin(K, lin_B)
208 | 	if excluded == []:
209 | 		return lin_B
210 | 
211 | 	ll_null = calc_loglike(X_c, X_t, lin_B, [])
212 | 
213 | 	def lr_stat_lin(lin_term):
214 | 		ll_alt = calc_loglike(X_c, X_t, lin_B+[lin_term], [])
215 | 		return 2 * (ll_alt - ll_null)
216 | 
217 | 	lr_stats = np.array([lr_stat_lin(term) for term in excluded])
218 | 	argmax_lr = lr_stats.argmax()
219 | 
220 | 	if lr_stats[argmax_lr] < C_lin:
221 | 		return lin_B
222 | 	else:
223 | 		new_term = [excluded[argmax_lr]]
224 | 		return select_lin(X_c, X_t, lin_B+new_term, C_lin)
225 | 
226 | 
227 | def select_lin_terms(X_c, X_t, lin_B, C_lin):
228 | 
229 | 	# Mostly a wrapper around function select_lin to handle cases that
230 | 	# require little computation.
231 | 
232 | 	if C_lin <= 0:
233 | 		K = X_c.shape[1]
234 | 		return lin_B + get_excluded_lin(K, lin_B)
235 | 	elif C_lin == np.inf:
236 | 		return lin_B
237 | 	else:
238 | 		return select_lin(X_c, X_t, lin_B, C_lin)
239 | 
240 | 
241 | def select_qua(X_c, X_t, lin, qua_B, C_qua):
242 | 
243 | 	# Selects, through a sequence of likelihood ratio tests, the
244 | 	# variables that should be included quadratically in propensity
245 | 	# score estimation.
246 | 
247 | 	excluded = get_excluded_qua(lin, qua_B)
248 | 	if excluded == []:
249 | 		return qua_B
250 | 
251 | 	ll_null = calc_loglike(X_c, X_t, lin, qua_B)
252 | 
253 | 	def lr_stat_qua(qua_term):
254 | 		ll_alt = calc_loglike(X_c, X_t, lin, qua_B+[qua_term])
255 | 		return 2 * (ll_alt - ll_null)
256 | 
257 | 	lr_stats = np.array([lr_stat_qua(term) for term in excluded])
258 | 	argmax_lr = lr_stats.argmax()
259 | 
260 | 	if lr_stats[argmax_lr] < C_qua:
261 | 		return qua_B
262 | 	else:
263 | 		new_term = [excluded[argmax_lr]]
264 | 		return select_qua(X_c, X_t, lin, qua_B+new_term, C_qua)
265 | 
266 | 
267 | def select_qua_terms(X_c, X_t, lin, C_qua):
268 | 
269 | 	# Mostly a wrapper around function select_qua to handle cases that
270 | 	# require little computation.
271 | 
272 | 	if lin == []:
273 | 		return []
274 | 	if C_qua <= 0:
275 | 		return get_excluded_qua(lin, [])
276 | 	elif C_qua == np.inf:
277 | 		return []
278 | 	else:
279 | 		return select_qua(X_c, X_t, lin, [], C_qua)
280 | 
281 | 


--------------------------------------------------------------------------------
/causalinference/core/strata.py:
--------------------------------------------------------------------------------
 1 | import causalinference.utils.tools as tools
 2 | 
 3 | 
 4 | class Strata(object):
 5 | 
 6 | 	"""
 7 | 	List-like object containing the stratified propensity bins.
 8 | 	"""
 9 | 
10 | 	def __init__(self, strata, subsets, pscore):
11 | 
12 | 		self._strata = strata
13 | 		for stratum, subset in zip(self._strata, subsets):
14 | 			pscore_sub = pscore[subset]
15 | 			stratum.raw_data._dict['pscore'] = pscore_sub
16 | 			D_sub = stratum.raw_data['D']
17 | 			pscore_sub_c = pscore_sub[D_sub==0]
18 | 			pscore_sub_t = pscore_sub[D_sub==1]
19 | 			stratum.summary_stats._summarize_pscore(pscore_sub_c,
20 | 			                                        pscore_sub_t)
21 | 			
22 | 
23 | 	def __len__(self):
24 | 
25 | 		return len(self._strata)
26 | 
27 | 
28 | 	def __getitem__(self, index):
29 | 
30 | 		return self._strata[index]
31 | 
32 | 
33 | 	def __str__(self):
34 | 
35 | 		table_width = 80
36 | 
37 | 		output = '\n'
38 | 		output += 'Stratification Summary\n\n'
39 | 
40 | 		entries1 = ['', 'Propensity Score', 'Sample Size',
41 | 		            'Ave. Propensity', 'Outcome']
42 | 		entry_types1 = ['string']*5
43 | 		col_spans1 = [1, 2, 2, 2, 1]
44 | 		output += tools.add_row(entries1, entry_types1,
45 | 		                        col_spans1, table_width)
46 | 
47 | 		entries2 = ['Stratum', 'Min.', 'Max.', 'Controls', 'Treated',
48 | 		            'Controls', 'Treated', 'Raw-diff']
49 | 		entry_types2 = ['string']*8
50 | 		col_spans2 = [1]*8
51 | 		output += tools.add_row(entries2, entry_types2,
52 | 		                        col_spans2, table_width)
53 | 		output += tools.add_line(table_width)
54 | 
55 | 		strata = self._strata
56 | 		entry_types3 = ['integer', 'float', 'float', 'integer',
57 | 		                'integer', 'float', 'float', 'float']
58 | 		for i in range(len(strata)):
59 | 			summary = strata[i].summary_stats
60 | 			N_c, N_t = summary['N_c'], summary['N_t']
61 | 			p_min, p_max = summary['p_min'], summary['p_max']
62 | 			p_c_mean = summary['p_c_mean']
63 | 			p_t_mean = summary['p_t_mean']
64 | 			within = summary['rdiff']
65 | 			entries3 = [i+1, p_min, p_max, N_c, N_t,
66 | 			            p_c_mean, p_t_mean, within]
67 | 			output += tools.add_row(entries3, entry_types3,
68 | 			                        col_spans2, table_width)
69 | 
70 | 		return output
71 | 
72 | 


--------------------------------------------------------------------------------
/causalinference/core/summary.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import numpy as np
  3 | 
  4 | import causalinference.utils.tools as tools
  5 | from .data import Dict
  6 | 
  7 | 
  8 | class Summary(Dict):
  9 | 
 10 | 	"""
 11 | 	Dictionary-like class containing summary statistics for input data.
 12 | 
 13 | 	One of the summary statistics is the normalized difference between
 14 | 	covariates. Large values indicate that simple linear adjustment methods
 15 | 	may not be adequate for removing biases that are associated with
 16 | 	differences in covariates.
 17 | 	"""
 18 | 
 19 | 	def __init__(self, data):
 20 | 
 21 | 		self._dict = dict()
 22 | 
 23 | 		self._dict['N'], self._dict['K'] = data['N'], data['K']
 24 | 		self._dict['N_c'], self._dict['N_t'] = data['N_c'], data['N_t']
 25 | 		self._dict['Y_c_mean'] = data['Y_c'].mean()
 26 | 		self._dict['Y_t_mean'] = data['Y_t'].mean()
 27 | 		self._dict['Y_c_sd'] = np.sqrt(data['Y_c'].var(ddof=1))
 28 | 		self._dict['Y_t_sd'] = np.sqrt(data['Y_t'].var(ddof=1))
 29 | 		self._dict['rdiff'] = self['Y_t_mean'] - self['Y_c_mean']
 30 | 		self._dict['X_c_mean'] = data['X_c'].mean(0)
 31 | 		self._dict['X_t_mean'] = data['X_t'].mean(0)
 32 | 		self._dict['X_c_sd'] = np.sqrt(data['X_c'].var(0, ddof=1))
 33 | 		self._dict['X_t_sd'] = np.sqrt(data['X_t'].var(0, ddof=1))
 34 | 		self._dict['ndiff'] = calc_ndiff(self['X_c_mean'],
 35 | 		                                 self['X_t_mean'],
 36 | 						 self['X_c_sd'],
 37 | 						 self['X_t_sd'])
 38 | 
 39 | 
 40 | 	def _summarize_pscore(self, pscore_c, pscore_t):
 41 | 
 42 | 		"""
 43 | 		Called by Strata class during initialization.
 44 | 		"""
 45 | 
 46 | 		self._dict['p_min'] = min(pscore_c.min(), pscore_t.min())
 47 | 		self._dict['p_max'] = max(pscore_c.max(), pscore_t.max())
 48 | 		self._dict['p_c_mean'] = pscore_c.mean()
 49 | 		self._dict['p_t_mean'] = pscore_t.mean()
 50 | 
 51 | 
 52 | 	def __str__(self):
 53 | 
 54 | 		table_width = 80
 55 | 
 56 | 		N_c, N_t, K = self['N_c'], self['N_t'], self['K']
 57 | 		Y_c_mean, Y_t_mean = self['Y_c_mean'], self['Y_t_mean']
 58 | 		Y_c_sd, Y_t_sd = self['Y_c_sd'], self['Y_t_sd']
 59 | 		X_c_mean, X_t_mean = self['X_c_mean'], self['X_t_mean']
 60 | 		X_c_sd, X_t_sd = self['X_c_sd'], self['X_t_sd']
 61 | 		rdiff, ndiff = self['rdiff'], self['ndiff']
 62 | 		varnames = ['X'+str(i) for i in range(K)]
 63 | 		
 64 | 		output = '\n'
 65 | 		output += 'Summary Statistics\n\n'
 66 | 
 67 | 		entries1 = ['', 'Controls (N_c='+str(N_c)+')',
 68 | 		            'Treated (N_t='+str(N_t)+')', '']
 69 | 		entry_types1 = ['string']*4
 70 | 		col_spans1 = [1, 2, 2, 1]
 71 | 		output += tools.add_row(entries1, entry_types1,
 72 | 		                        col_spans1, table_width)
 73 | 
 74 | 		entries2 = ['Variable', 'Mean', 'S.d.',
 75 | 		            'Mean', 'S.d.', 'Raw-diff']
 76 | 		entry_types2 = ['string']*6
 77 | 		col_spans2 = [1]*6
 78 | 		output += tools.add_row(entries2, entry_types2,
 79 | 		                        col_spans2, table_width)
 80 | 		output += tools.add_line(table_width)
 81 | 
 82 | 		entries3 = ['Y', Y_c_mean, Y_c_sd, Y_t_mean, Y_t_sd, rdiff]
 83 | 		entry_types3 = ['string'] + ['float']*5
 84 | 		col_spans3 = [1]*6
 85 | 		output += tools.add_row(entries3, entry_types3,
 86 | 		                        col_spans3, table_width)
 87 | 
 88 | 		output += '\n'
 89 | 		output += tools.add_row(entries1, entry_types1,
 90 | 		                        col_spans1, table_width)
 91 | 
 92 | 		entries4 = ['Variable', 'Mean', 'S.d.',
 93 | 		            'Mean', 'S.d.', 'Nor-diff']
 94 | 		output += tools.add_row(entries4, entry_types2,
 95 | 		                        col_spans2, table_width)
 96 | 		output += tools.add_line(table_width)
 97 | 		
 98 | 		entry_types5 = ['string'] + ['float']*5
 99 | 		col_spans5 = [1]*6
100 | 		for entries5 in zip(varnames, X_c_mean, X_c_sd,
101 | 		                    X_t_mean, X_t_sd, ndiff):
102 | 			output += tools.add_row(entries5, entry_types5,
103 | 			                        col_spans5, table_width)
104 | 
105 | 		return output
106 | 			
107 | 
108 | def calc_ndiff(mean_c, mean_t, sd_c, sd_t):
109 | 
110 | 	return (mean_t-mean_c) / np.sqrt((sd_c**2+sd_t**2)/2)
111 | 
112 | 


--------------------------------------------------------------------------------
/causalinference/estimators/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Estimators
2 | from .ols import OLS
3 | from .blocking import Blocking
4 | from .weighting import Weighting
5 | from .matching import Matching
6 | 
7 | 


--------------------------------------------------------------------------------
/causalinference/estimators/base.py:
--------------------------------------------------------------------------------
 1 | import causalinference.utils.tools as tools
 2 | from ..core import Dict
 3 | 
 4 | 
 5 | class Estimator(Dict):
 6 | 
 7 | 	"""
 8 | 	Dictionary-like class containing treatment effect estimates.
 9 | 	"""
10 | 
11 | 	def __str__(self):
12 | 
13 | 		table_width = 80
14 | 
15 | 		names = ['ate', 'atc', 'att']
16 | 		coefs = [self[name] for name in names if name in self.keys()]
17 | 		ses = [self[name+'_se'] for name in names if name+'_se' in self.keys()]
18 | 
19 | 		output = '\n'
20 | 		output += 'Treatment Effect Estimates: ' + self._method + '\n\n'
21 | 
22 | 		entries1 = ['', 'Est.', 'S.e.', 'z', 'P>|z|',
23 | 		           '[95% Conf. int.]']
24 | 		entry_types1 = ['string']*6
25 | 		col_spans1 = [1]*5 + [2]
26 | 		output += tools.add_row(entries1, entry_types1,
27 | 		                        col_spans1, table_width)
28 | 		output += tools.add_line(table_width)
29 | 
30 | 		entry_types2 = ['string'] + ['float']*6
31 | 		col_spans2 = [1]*7
32 | 		for (name, coef, se) in zip(names, coefs, ses):
33 | 			entries2 = tools.gen_reg_entries(name.upper(), coef, se)
34 | 			output += tools.add_row(entries2, entry_types2,
35 | 			                        col_spans2, table_width)
36 | 
37 | 		return output
38 | 
39 | 
40 | class Estimators(Dict):
41 | 
42 | 	"""
43 | 	Dictionary-like class containing treatment effect estimates for each
44 | 	estimator used.
45 | 	"""
46 | 
47 | 	def __init__(self):
48 | 
49 | 		self._dict = {}
50 | 
51 | 
52 | 	def __setitem__(self, key, item):
53 | 
54 | 		self._dict[key] = item
55 | 
56 | 
57 | 	def __str__(self):
58 | 
59 | 		output = ''
60 | 		for method in self.keys():
61 | 			output += self[method].__str__()
62 | 
63 | 		return output
64 | 
65 | 


--------------------------------------------------------------------------------
/causalinference/estimators/blocking.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import numpy as np
 3 | 
 4 | from .base import Estimator
 5 | 
 6 | 
 7 | class Blocking(Estimator):
 8 | 
 9 | 	"""
10 | 	Dictionary-like class containing treatment effect estimates.
11 | 	"""
12 | 
13 | 	def __init__(self, strata, adj):
14 | 	
15 | 		self._method = 'Blocking'
16 | 		for s in strata:
17 | 			s.est_via_ols(adj)
18 | 
19 | 		Ns = [s.raw_data['N'] for s in strata]
20 | 		N_cs = [s.raw_data['N_c'] for s in strata]
21 | 		N_ts = [s.raw_data['N_t'] for s in strata]
22 | 
23 | 		ates = [s.estimates['ols']['ate'] for s in strata]
24 | 		ate_ses = [s.estimates['ols']['ate_se'] for s in strata]
25 | 		if adj <= 1:
26 | 			atcs, atts = ates, ates
27 | 			atc_ses, att_ses = ate_ses, ate_ses
28 | 		else:
29 | 			atcs = [s.estimates['ols']['atc'] for s in strata]
30 | 			atts = [s.estimates['ols']['att'] for s in strata]
31 | 			atc_ses = [s.estimates['ols']['atc_se'] for s in strata]
32 | 			att_ses = [s.estimates['ols']['att_se'] for s in strata]
33 | 
34 | 		self._dict = dict()
35 | 		self._dict['ate'] = calc_atx(ates, Ns)
36 | 		self._dict['atc'] = calc_atx(atcs, N_cs)
37 | 		self._dict['att'] = calc_atx(atts, N_ts)
38 | 
39 | 		self._dict['ate_se'] = calc_atx_se(ate_ses, Ns)
40 | 		self._dict['atc_se'] = calc_atx_se(atc_ses, N_cs)
41 | 		self._dict['att_se'] = calc_atx_se(att_ses, N_ts)
42 | 
43 | 
44 | def calc_atx(atxs, Ns):
45 | 
46 | 	N = sum(Ns)
47 | 
48 | 	return np.sum(np.array(atxs) * np.array(Ns)) / N
49 | 
50 | 
51 | def calc_atx_se(atx_ses, Ns):
52 | 
53 | 	N = sum(Ns)
54 | 	var = np.sum(np.array(atx_ses)**2 * np.array(Ns)**2) / N**2
55 | 
56 | 	return np.sqrt(var)
57 | 
58 | 


--------------------------------------------------------------------------------
/causalinference/estimators/matching.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import numpy as np
  3 | from itertools import chain
  4 | from functools import reduce
  5 | 
  6 | from .base import Estimator
  7 | 
  8 | 
  9 | class Matching(Estimator):
 10 | 
 11 | 	"""
 12 | 	Dictionary-like class containing treatment effect estimates. Standard
 13 | 	errors are only computed when needed.
 14 | 	"""
 15 | 
 16 | 	def __init__(self, data, W, m, bias_adj):
 17 | 
 18 | 		self._method = 'Matching'
 19 | 		N, N_c, N_t = data['N'], data['N_c'], data['N_t']
 20 | 		Y_c, Y_t = data['Y_c'], data['Y_t']
 21 | 		X_c, X_t = data['X_c'], data['X_t']
 22 | 
 23 | 		matches_c = [match(X_i, X_t, W, m) for X_i in X_c]
 24 | 		matches_t = [match(X_i, X_c, W, m) for X_i in X_t]
 25 | 		Yhat_c = np.array([Y_t[idx].mean() for idx in matches_c])
 26 | 		Yhat_t = np.array([Y_c[idx].mean() for idx in matches_t])
 27 | 		ITT_c = Yhat_c - Y_c
 28 | 		ITT_t = Y_t - Yhat_t
 29 | 
 30 | 		if bias_adj:
 31 | 			bias_coefs_c = bias_coefs(matches_c, Y_t, X_t)
 32 | 			bias_coefs_t = bias_coefs(matches_t, Y_c, X_c)
 33 | 			bias_c = bias(X_c, X_t, matches_c, bias_coefs_c)
 34 | 			bias_t = bias(X_t, X_c, matches_t, bias_coefs_t)
 35 | 			ITT_c = ITT_c - bias_c
 36 | 			ITT_t = ITT_t + bias_t
 37 | 
 38 | 		self._dict = dict()
 39 | 		self._dict['atc'] = ITT_c.mean()
 40 | 		self._dict['att'] = ITT_t.mean()
 41 | 		self._dict['ate'] = (N_c/N)*self['atc'] + (N_t/N)*self['att']
 42 | 
 43 | 		scaled_counts_c = scaled_counts(N_c, matches_t)
 44 | 		scaled_counts_t = scaled_counts(N_t, matches_c)
 45 | 		vars_c = np.repeat(ITT_c.var(), N_c)  # conservative
 46 | 		vars_t = np.repeat(ITT_t.var(), N_t)  # conservative
 47 | 		self._dict['atc_se'] = calc_atc_se(vars_c, vars_t, scaled_counts_t)
 48 | 		self._dict['att_se'] = calc_att_se(vars_c, vars_t, scaled_counts_c)
 49 | 		self._dict['ate_se'] = calc_ate_se(vars_c, vars_t,
 50 | 		                                   scaled_counts_c,
 51 | 						   scaled_counts_t)
 52 | 
 53 | 
 54 | def norm(X_i, X_m, W):
 55 | 
 56 | 	dX = X_m - X_i
 57 | 	if W.ndim == 1:
 58 | 		return (dX**2 * W).sum(1)
 59 | 	else:
 60 | 		return (dX.dot(W)*dX).sum(1)
 61 | 
 62 | 
 63 | def smallestm(d, m):
 64 | 
 65 | 	# Finds indices of the smallest m numbers in an array. Tied values are
 66 | 	# included as well, so number of returned indices can be greater than m.
 67 | 
 68 | 	# partition around (m+1)th order stat
 69 | 	par_idx = np.argpartition(d, m)
 70 | 
 71 | 	if d[par_idx[:m]].max() < d[par_idx[m]]:  # m < (m+1)th
 72 | 		return par_idx[:m]
 73 | 	elif d[par_idx[m]] < d[par_idx[m+1:]].min():  # m+1 < (m+2)th
 74 | 		return par_idx[:m+1]
 75 | 	else:  # mth = (m+1)th = (m+2)th, so increment and recurse
 76 | 		return smallestm(d, m+2)
 77 | 
 78 | 
 79 | def match(X_i, X_m, W, m):
 80 | 
 81 | 	d = norm(X_i, X_m, W)
 82 | 
 83 | 	return smallestm(d, m)
 84 | 
 85 | 
 86 | def bias_coefs(matches, Y_m, X_m):
 87 | 
 88 | 	# Computes OLS coefficient in bias correction regression. Constructs
 89 | 	# data for regression by including (possibly multiple times) every
 90 | 	# observation that has appeared in the matched sample.
 91 | 
 92 | 	flat_idx = reduce(lambda x,y: np.concatenate((x,y)), matches)
 93 | 	N, K = len(flat_idx), X_m.shape[1]
 94 | 
 95 | 	Y = Y_m[flat_idx]
 96 | 	X = np.empty((N, K+1))
 97 | 	X[:, 0] = 1  # intercept term
 98 | 	X[:, 1:] = X_m[flat_idx]
 99 | 
100 | 	return np.linalg.lstsq(X, Y)[0][1:]  # don't need intercept coef
101 | 
102 | 
103 | def bias(X, X_m, matches, coefs):
104 | 
105 | 	# Computes bias correction term, which is approximated by the dot
106 | 	# product of the matching discrepancy (i.e., X-X_matched) and the
107 | 	# coefficients from the bias correction regression.
108 | 
109 | 	X_m_mean = [X_m[idx].mean(0) for idx in matches]
110 | 	bias_list = [(X_j-X_i).dot(coefs) for X_i,X_j in zip(X, X_m_mean)]
111 | 
112 | 	return np.array(bias_list)
113 | 
114 | 
115 | def scaled_counts(N, matches):
116 | 
117 | 	# Counts the number of times each subject has appeared as a match. In
118 | 	# the case of multiple matches, each subject only gets partial credit.
119 | 
120 | 	s_counts = np.zeros(N)
121 | 
122 | 	for matches_i in matches:
123 | 		scale = 1 / len(matches_i)
124 | 		for match in matches_i:
125 | 			s_counts[match] += scale
126 | 
127 | 	return s_counts
128 | 
129 | 
130 | def calc_atx_var(vars_c, vars_t, weights_c, weights_t):
131 | 
132 | 	N_c, N_t = len(vars_c), len(vars_t)
133 | 	summands_c = weights_c**2 * vars_c
134 | 	summands_t = weights_t**2 * vars_t
135 | 
136 | 	return summands_t.sum()/N_t**2 + summands_c.sum()/N_c**2
137 | 	
138 | 
139 | def calc_atc_se(vars_c, vars_t, scaled_counts_t):
140 | 
141 | 	N_c, N_t = len(vars_c), len(vars_t)
142 | 	weights_c = np.ones(N_c)
143 | 	weights_t = (N_t/N_c) * scaled_counts_t
144 | 
145 | 	var = calc_atx_var(vars_c, vars_t, weights_c, weights_t)
146 | 
147 | 	return np.sqrt(var)
148 | 
149 | 
150 | def calc_att_se(vars_c, vars_t, scaled_counts_c):
151 | 
152 | 	N_c, N_t = len(vars_c), len(vars_t)
153 | 	weights_c = (N_c/N_t) * scaled_counts_c
154 | 	weights_t = np.ones(N_t)
155 | 
156 | 	var = calc_atx_var(vars_c, vars_t, weights_c, weights_t)
157 | 
158 | 	return np.sqrt(var)
159 | 
160 | 
161 | def calc_ate_se(vars_c, vars_t, scaled_counts_c, scaled_counts_t):
162 | 
163 | 	N_c, N_t = len(vars_c), len(vars_t)
164 | 	N = N_c + N_t
165 | 	weights_c = (N_c/N)*(1+scaled_counts_c)
166 | 	weights_t = (N_t/N)*(1+scaled_counts_t)
167 | 	
168 | 	var = calc_atx_var(vars_c, vars_t, weights_c, weights_t)
169 | 
170 | 	return np.sqrt(var)
171 | 
172 | 


--------------------------------------------------------------------------------
/causalinference/estimators/ols.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import numpy as np
  3 | import scipy.linalg
  4 | 
  5 | from .base import Estimator
  6 | 
  7 | 
  8 | class OLS(Estimator):
  9 | 
 10 | 	"""
 11 | 	Dictionary-like class containing treatment effect estimates.
 12 | 	"""
 13 | 
 14 | 	def __init__(self, data, adj):
 15 | 
 16 | 		self._method = 'OLS'
 17 | 		Y, D, X = data['Y'], data['D'], data['X']
 18 | 		X_c, X_t = data['X_c'], data['X_t']
 19 | 
 20 | 		Z = form_matrix(D, X, adj)
 21 | 		olscoef = np.linalg.lstsq(Z, Y)[0]
 22 | 		u = Y - Z.dot(olscoef)
 23 | 		cov = calc_cov(Z, u)
 24 | 
 25 | 		self._dict = dict()
 26 | 		self._dict['ate'] = calc_ate(olscoef)
 27 | 		self._dict['ate_se'] = calc_ate_se(cov)
 28 | 
 29 | 		if adj == 2:
 30 | 			Xmean = X.mean(0)
 31 | 			meandiff_c = X_c.mean(0) - Xmean
 32 | 			meandiff_t = X_t.mean(0) - Xmean
 33 | 			self._dict['atc'] = calc_atx(olscoef, meandiff_c)
 34 | 			self._dict['att'] = calc_atx(olscoef, meandiff_t)
 35 | 			self._dict['atc_se'] = calc_atx_se(cov, meandiff_c)
 36 | 			self._dict['att_se'] = calc_atx_se(cov, meandiff_t)
 37 | 
 38 | 
 39 | def form_matrix(D, X, adj):
 40 | 
 41 | 	N, K = X.shape
 42 | 
 43 | 	if adj == 0:
 44 | 		cols = 2
 45 | 	elif adj == 1:
 46 | 		cols = 2+K
 47 | 	else:
 48 | 		cols = 2+2*K
 49 | 	
 50 | 	Z = np.empty((N, cols))
 51 | 	Z[:, 0] = 1  # intercept term
 52 | 	Z[:, 1] = D
 53 | 	if adj >= 1:
 54 | 		dX = X - X.mean(0)
 55 | 		Z[:, 2:2+K] = dX
 56 | 	if adj == 2:
 57 | 		Z[:, 2+K:] = D[:, None] * dX
 58 | 
 59 | 	return Z
 60 | 
 61 | 
 62 | def calc_ate(olscoef):
 63 | 
 64 | 	return olscoef[1]  # coef of treatment variable
 65 | 
 66 | 
 67 | def calc_atx(olscoef, meandiff):
 68 | 
 69 | 	K = (len(olscoef)-2) // 2
 70 | 
 71 | 	return olscoef[1] + np.dot(meandiff, olscoef[2+K:])
 72 | 
 73 | 
 74 | def calc_cov(Z, u):
 75 | 
 76 | 	A = np.linalg.inv(np.dot(Z.T, Z))
 77 | 	B = np.dot(u[:, None]*Z, A)
 78 | 
 79 | 	return np.dot(B.T, B)
 80 | 
 81 | 
 82 | def submatrix(cov):
 83 | 
 84 | 	K = (cov.shape[0]-2) // 2
 85 | 	submat = np.empty((1+K, 1+K))
 86 | 	submat[0,0] = cov[1,1]
 87 | 	submat[0,1:] = cov[1,2+K:]
 88 | 	submat[1:,0] = cov[2+K:,1]
 89 | 	submat[1:,1:] = cov[2+K:, 2+K:]
 90 | 
 91 | 	return submat
 92 | 
 93 | 
 94 | def calc_ate_se(cov):
 95 | 
 96 | 	return np.sqrt(cov[1,1])
 97 | 
 98 | 
 99 | def calc_atx_se(cov, meandiff):
100 | 
101 | 	a = np.concatenate((np.array([1]), meandiff))
102 | 
103 | 	return np.sqrt(a.dot(submatrix(cov)).dot(a))
104 | 
105 | 


--------------------------------------------------------------------------------
/causalinference/estimators/weighting.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import numpy as np
 3 | 
 4 | from .base import Estimator
 5 | from .ols import calc_cov, calc_ate, calc_ate_se
 6 | 
 7 | 
 8 | class Weighting(Estimator):
 9 | 
10 | 	"""
11 | 	Dictionary-like class containing treatment effect estimates.
12 | 	"""
13 | 
14 | 	def __init__(self, data):
15 | 
16 | 		self._method = 'Weighting'
17 | 		Y, D, X = data['Y'], data['D'], data['X']
18 | 		pscore = data['pscore']
19 | 
20 | 		weights = calc_weights(pscore, D)
21 | 		Y_w, Z_w = weigh_data(Y, D, X, weights)
22 | 
23 | 		wlscoef = np.linalg.lstsq(Z_w, Y_w)[0]
24 | 		u_w = Y_w - Z_w.dot(wlscoef)
25 | 		cov = calc_cov(Z_w, u_w)
26 | 
27 | 		self._dict = dict()
28 | 		self._dict['ate'] = calc_ate(wlscoef)
29 | 		self._dict['ate_se'] = calc_ate_se(cov)
30 | 
31 | 
32 | def calc_weights(pscore, D):
33 | 
34 | 	N = pscore.shape[0]
35 | 	weights = np.empty(N)
36 | 	weights[D==0] = 1/(1-pscore[D==0])
37 | 	weights[D==1] = 1/pscore[D==1]
38 | 
39 | 	return weights
40 | 
41 | 
42 | def weigh_data(Y, D, X, weights):
43 | 
44 | 	N, K = X.shape
45 | 
46 | 	Y_w = weights * Y
47 | 
48 | 	Z_w = np.empty((N,K+2))
49 | 	Z_w[:,0] = weights
50 | 	Z_w[:,1] = weights * D
51 | 	Z_w[:,2:] = weights[:,None] * X
52 | 
53 | 	return (Y_w, Z_w)
54 | 
55 | 


--------------------------------------------------------------------------------
/causalinference/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .tools import random_data, vignette_data, lalonde_data
2 | 
3 | 


--------------------------------------------------------------------------------
/causalinference/utils/lalonde_data.txt:
--------------------------------------------------------------------------------
  1 | re78	t	black	hisp	age	married	nodegree	educ	re74	u74	re75	u75
  2 | 9.93005	1	1	0	37	1	1	11	0	1	0	1
  3 | 3.59589	1	0	1	22	0	1	9	0	1	0	1
  4 | 24.9095	1	1	0	30	0	0	12	0	1	0	1
  5 | 7.50615	1	1	0	27	0	1	11	0	1	0	1
  6 | .28979	1	1	0	33	0	1	8	0	1	0	1
  7 | 4.05649	1	1	0	22	0	1	9	0	1	0	1
  8 | 0	1	1	0	23	0	0	12	0	1	0	1
  9 | 8.47216	1	1	0	32	0	1	11	0	1	0	1
 10 | 2.16402	1	1	0	22	0	0	16	0	1	0	1
 11 | 12.4181	1	0	0	33	1	0	12	0	1	0	1
 12 | 8.17391	1	1	0	19	0	1	9	0	1	0	1
 13 | 17.0946	1	1	0	21	0	0	13	0	1	0	1
 14 | 0	1	1	0	18	0	1	8	0	1	0	1
 15 | 18.7399	1	1	0	27	1	1	10	0	1	0	1
 16 | 3.02388	1	1	0	17	0	1	7	0	1	0	1
 17 | 3.2285	1	1	0	19	0	1	10	0	1	0	1
 18 | 14.5819	1	1	0	27	0	0	13	0	1	0	1
 19 | 7.6934	1	1	0	23	0	1	10	0	1	0	1
 20 | 10.8043	1	1	0	40	0	0	12	0	1	0	1
 21 | 10.7474	1	1	0	26	0	0	12	0	1	0	1
 22 | 0	1	1	0	23	0	1	11	0	1	0	1
 23 | 5.1495	1	0	0	41	0	0	14	0	1	0	1
 24 | 6.40895	1	0	0	38	0	1	9	0	1	0	1
 25 | 1.9914	1	1	0	24	0	1	11	0	1	0	1
 26 | 11.1632	1	1	0	18	0	1	10	0	1	0	1
 27 | 9.643	1	1	0	29	1	1	11	0	1	0	1
 28 | 9.89705	1	1	0	25	0	1	11	0	1	0	1
 29 | 11.1429	1	0	1	27	0	1	10	0	1	0	1
 30 | 16.218	1	1	0	17	0	1	10	0	1	0	1
 31 | .9957	1	1	0	24	0	1	11	0	1	0	1
 32 | 0	1	1	0	17	0	1	10	0	1	0	1
 33 | 6.55159	1	1	0	48	0	1	4	0	1	0	1
 34 | 1.57442	1	1	0	25	1	1	11	0	1	0	1
 35 | 0	1	1	0	20	0	0	12	0	1	0	1
 36 | 3.19175	1	1	0	25	0	0	12	0	1	0	1
 37 | 20.5059	1	1	0	42	0	0	14	0	1	0	1
 38 | 6.18188	1	1	0	25	0	1	5	0	1	0	1
 39 | 5.91155	1	1	0	23	1	0	12	0	1	0	1
 40 | 3.09416	1	1	0	46	1	1	8	0	1	0	1
 41 | 0	1	1	0	24	0	1	10	0	1	0	1
 42 | 1.25458	1	1	0	21	0	0	12	0	1	0	1
 43 | 13.1888	1	0	0	19	0	1	9	0	1	0	1
 44 | 8.06149	1	1	0	17	0	1	8	0	1	0	1
 45 | 2.78796	1	0	1	18	1	1	8	0	1	0	1
 46 | 3.97254	1	1	0	20	0	1	11	0	1	0	1
 47 | 0	1	1	0	25	1	1	11	0	1	0	1
 48 | 0	1	1	0	17	0	1	8	0	1	0	1
 49 | 0	1	1	0	17	0	1	9	0	1	0	1
 50 | 12.1874	1	1	0	25	0	1	5	0	1	0	1
 51 | 4.84318	1	1	0	23	0	0	12	0	1	0	1
 52 | 0	1	1	0	28	0	1	8	0	1	0	1
 53 | 8.08749	1	1	0	31	1	1	11	0	1	0	1
 54 | 0	1	1	0	18	0	1	11	0	1	0	1
 55 | 2.34897	1	1	0	25	0	0	12	0	1	0	1
 56 | .590782	1	1	0	30	1	1	11	0	1	0	1
 57 | 0	1	1	0	17	0	1	10	0	1	0	1
 58 | 1.06751	1	1	0	37	0	1	9	0	1	0	1
 59 | 7.28499	1	1	0	41	1	1	4	0	1	0	1
 60 | 13.1675	1	1	0	42	1	0	14	0	1	0	1
 61 | 1.04843	1	0	0	22	0	1	11	0	1	0	1
 62 | 0	1	1	0	17	0	1	8	0	1	0	1
 63 | 1.92394	1	1	0	29	0	1	8	0	1	0	1
 64 | 4.66624	1	1	0	35	0	1	10	0	1	0	1
 65 | .549298	1	1	0	27	0	1	11	0	1	0	1
 66 | .762915	1	1	0	29	0	1	4	0	1	0	1
 67 | 10.6943	1	1	0	28	0	1	9	0	1	0	1
 68 | 0	1	1	0	27	0	1	11	0	1	0	1
 69 | 0	1	0	0	23	0	1	7	0	1	0	1
 70 | 8.54672	1	1	0	45	1	1	5	0	1	0	1
 71 | 7.47966	1	1	0	29	0	0	13	0	1	0	1
 72 | 0	1	1	0	27	0	1	9	0	1	0	1
 73 | .647205	1	1	0	46	0	0	13	0	1	0	1
 74 | 0	1	1	0	18	0	1	6	0	1	0	1
 75 | 11.9658	1	1	0	25	0	0	12	0	1	0	1
 76 | 9.59854	1	1	0	28	0	0	15	0	1	0	1
 77 | 18.7834	1	0	0	25	0	1	11	0	1	0	1
 78 | 18.6781	1	1	0	22	0	0	12	0	1	0	1
 79 | 0	1	1	0	21	0	1	9	0	1	0	1
 80 | 23.0056	1	1	0	40	0	1	11	0	1	0	1
 81 | 6.4567	1	1	0	22	0	1	11	0	1	0	1
 82 | 0	1	1	0	25	0	0	12	0	1	0	1
 83 | 2.32111	1	1	0	18	0	0	12	0	1	0	1
 84 | 4.94185	1	0	0	38	0	0	12	0	1	0	1
 85 | 0	1	1	0	27	0	0	13	0	1	0	1
 86 | 0	1	1	0	27	0	1	8	0	1	0	1
 87 | 0	1	1	0	38	0	1	11	0	1	0	1
 88 | 3.88128	1	0	1	23	0	1	8	0	1	0	1
 89 | 17.231	1	1	0	26	0	1	11	0	1	0	1
 90 | 8.0486	1	0	0	21	0	0	12	0	1	0	1
 91 | 0	1	1	0	25	0	1	8	0	1	0	1
 92 | 14.5099	1	1	0	31	1	1	11	0	1	0	1
 93 | 0	1	1	0	17	0	1	10	0	1	0	1
 94 | 0	1	1	0	25	0	1	11	0	1	0	1
 95 | 9.98378	1	1	0	21	0	0	12	0	1	0	1
 96 | 0	1	1	0	44	0	1	11	0	1	0	1
 97 | 5.5875	1	0	0	25	0	0	12	0	1	0	1
 98 | 4.48285	1	1	0	18	0	1	9	0	1	0	1
 99 | 2.45615	1	1	0	42	0	0	12	0	1	0	1
100 | 0	1	1	0	25	0	1	10	0	1	0	1
101 | 26.8176	1	0	1	31	0	1	9	0	1	0	1
102 | 0	1	1	0	24	0	1	10	0	1	0	1
103 | 9.26579	1	1	0	26	0	1	10	0	1	0	1
104 | .48523	1	1	0	25	0	1	11	0	1	0	1
105 | 4.81463	1	1	0	18	0	1	11	0	1	0	1
106 | 7.45811	1	1	0	19	0	1	11	0	1	0	1
107 | 0	1	1	0	43	0	1	9	0	1	0	1
108 | 34.0993	1	1	0	27	0	0	13	0	1	0	1
109 | 1.95327	1	1	0	17	0	1	9	0	1	0	1
110 | 0	1	1	0	30	0	1	11	0	1	0	1
111 | 0	1	1	0	26	1	1	10	2.028	0	0	1
112 | 8.88167	1	1	0	20	0	1	9	6.08399	0	0	1
113 | 6.21067	1	0	1	17	0	1	9	.44517	0	.0743435	0
114 | 0	1	1	0	20	0	0	12	.989268	0	.165208	0
115 | .929884	1	1	0	18	0	1	11	.858254	0	.214564	0
116 | 0	1	1	0	27	1	0	12	3.67087	0	.334049	0
117 | 12.558	1	0	0	21	0	0	12	3.67087	0	.334049	0
118 | 22.1633	1	1	0	27	0	0	12	2.14341	0	.35795	0
119 | 1.65264	1	1	0	20	0	0	12	0	1	.377569	0
120 | 8.124721	1	1	0	19	0	1	10	0	1	.385274	0
121 | .6713319	1	1	0	23	0	0	12	5.50631	0	.501074	0
122 | 17.815	1	1	0	29	0	0	14	0	1	.679673	0
123 | 9.73715	1	1	0	18	0	1	10	0	1	.798908	0
124 | 17.6852	1	1	0	19	0	1	9	0	1	.798908	0
125 | 0	1	0	0	27	1	0	13	9.38157	0	.853723	0
126 | 4.32171	1	0	0	18	0	1	11	3.67823	0	.919558	0
127 | 1.77342	1	1	0	27	1	1	9	0	1	.934445	0
128 | 0	1	1	0	22	0	0	12	5.60585	0	.936177	0
129 | 11.2333	1	1	0	23	1	1	10	0	1	.936439	0
130 | .559443	1	0	1	23	0	0	12	9.38574	0	1.11744	0
131 | 1.08544	1	1	0	20	0	1	11	3.6375	0	1.22084	0
132 | 5.4452	1	1	0	17	0	1	9	1.71651	0	1.25344	0
133 | 60.3079	1	1	0	28	0	1	11	0	1	1.28408	0
134 | 1.46036	1	1	0	26	1	1	11	0	1	1.39285	0
135 | 6.94334	1	1	0	20	0	1	11	16.3186	0	1.48499	0
136 | 4.03271	1	1	0	24	1	1	11	.824389	0	1.66611	0
137 | 10.3633	1	1	0	31	0	1	9	0	1	1.69861	0
138 | 4.23231	1	0	0	23	1	1	8	0	1	1.71315	0
139 | 11.1414	1	1	0	18	0	1	10	2.14341	0	1.78427	0
140 | 0	1	1	0	29	0	0	12	10.8819	0	1.81728	0
141 | 13.3859	1	0	0	26	0	1	11	0	1	2.22627	0
142 | 4.84956	1	1	0	24	0	1	9	9.1547	0	2.28868	0
143 | 0	1	1	0	25	0	0	12	14.4268	0	2.40927	0
144 | 1.66051	1	1	0	24	0	1	10	4.2504	0	2.42195	0
145 | 0	1	1	0	46	0	1	8	3.16566	0	2.59472	0
146 | 2.48455	1	0	0	31	0	0	12	0	1	2.61122	0
147 | 4.1466	1	1	0	19	0	1	11	2.30503	0	2.61528	0
148 | 9.970679	1	1	0	19	0	1	8	0	1	2.65706	0
149 | 0	1	1	0	27	0	1	11	2.20694	0	2.66627	0
150 | 26.3723	1	1	0	26	1	1	11	0	1	2.75465	0
151 | 5.61519	1	1	0	20	0	1	10	5.00573	0	2.77736	0
152 | 3.19657	1	1	0	28	0	1	10	0	1	2.83651	0
153 | 6.16768	1	1	0	24	0	0	12	13.7658	0	2.84276	0
154 | 7.53594	1	1	0	19	0	1	8	2.63635	0	2.93726	0
155 | 8.484241	1	1	0	23	0	0	12	6.26934	0	3.03996	0
156 | 1.29441	1	1	0	42	1	1	9	0	1	3.05853	0
157 | 0	1	1	0	25	0	0	13	12.3629	0	3.09073	0
158 | 5.01034	1	1	0	18	0	1	9	0	1	3.28738	0
159 | 9.37104	1	1	0	21	0	0	12	6.47368	0	3.33241	0
160 | 0	1	1	0	27	0	1	10	1.00115	0	3.55008	0
161 | 4.27961	1	1	0	21	0	1	8	.989268	0	3.6959	0
162 | 3.46256	1	1	0	22	0	1	9	2.19288	0	3.83699	0
163 | 7.38255	1	1	0	31	0	1	4	8.51759	0	4.02321	0
164 | 0	1	1	0	24	1	1	10	11.7032	0	4.07815	0
165 | 0	1	1	0	29	0	1	10	0	1	4.39895	0
166 | 10.9765	1	1	0	29	0	0	12	9.748389	0	4.87894	0
167 | 13.8296	1	0	0	19	0	1	10	0	1	5.32411	0
168 | 6.78846	1	0	1	19	1	1	11	5.42449	0	5.4638	0
169 | 9.5585	1	1	0	31	0	1	9	10.717	0	5.51784	0
170 | 13.2283	1	1	0	22	1	1	10	1.46835	0	5.58866	0
171 | .743667	1	1	0	21	0	1	9	6.41647	0	5.74933	0
172 | 5.52279	1	1	0	17	0	1	10	1.29147	0	5.79385	0
173 | 1.42494	1	1	0	26	1	0	12	8.40876	0	5.79483	0
174 | 1.35864	1	0	1	20	0	1	9	12.2608	0	5.87505	0
175 | 0	1	1	0	19	0	1	10	4.12195	0	6.05675	0
176 | .672877	1	1	0	26	0	1	10	25.9297	0	6.78896	0
177 | 0	1	1	0	28	0	1	11	1.92903	0	6.87186	0
178 | 10.0928	1	0	1	22	1	0	12	.492231	0	7.0557	0
179 | 6.28143	1	1	0	33	0	1	11	0	1	7.86792	0
180 | 12.5907	1	0	0	22	0	0	12	6.75999	0	8.4555	0
181 | 5.11201	1	0	1	29	0	1	10	0	1	8.85367	0
182 | 15.9526	1	1	0	33	1	0	12	20.28	0	10.9414	0
183 | 36.647	1	1	0	25	1	0	14	35.0401	0	11.5366	0
184 | 12.804	1	1	0	35	1	1	9	13.6024	0	13.8306	0
185 | 3.78663	1	1	0	35	1	1	8	13.7321	0	17.9762	0
186 | 4.18194	1	1	0	33	1	1	11	14.6607	0	25.1422	0
187 | 0	0	1	0	23	0	1	10	0	1	0	1
188 | 12.3837	0	0	0	26	0	0	12	0	1	0	1
189 | 0	0	1	0	22	0	1	9	0	1	0	1
190 | 10.7401	0	1	0	18	0	1	9	0	1	0	1
191 | 11.7965	0	1	0	45	0	1	11	0	1	0	1
192 | 9.22705	0	1	0	18	0	1	9	0	1	0	1
193 | 10.5693	0	0	0	24	0	1	8	0	1	0	1
194 | 6.04034	0	1	0	34	1	1	11	0	1	0	1
195 | 3.88083	0	0	1	24	0	1	4	0	1	0	1
196 | 0	0	1	0	36	0	1	10	0	1	0	1
197 | 5.77506	0	1	0	21	0	0	14	0	1	0	1
198 | 0	0	1	0	28	0	1	9	0	1	0	1
199 | 0	0	1	0	27	1	1	7	0	1	0	1
200 | 0	0	0	0	19	0	1	11	0	1	0	1
201 | 0	0	1	0	20	0	1	8	0	1	0	1
202 | 2.11372	0	1	0	34	0	0	12	0	1	0	1
203 | 7.61864	0	1	0	24	0	1	10	0	1	0	1
204 | 9.92095	0	0	1	22	0	1	8	0	1	0	1
205 | 4.19638	0	1	0	25	0	1	11	0	1	0	1
206 | 0	0	1	0	39	0	1	9	0	1	0	1
207 | 16.6583	0	1	0	19	1	1	9	0	1	0	1
208 | 9.722	0	1	0	44	0	1	9	0	1	0	1
209 | 3.78366	0	1	0	27	0	1	8	0	1	0	1
210 | 3.51593	0	1	0	25	0	1	8	0	1	0	1
211 | 17.0146	0	1	0	31	0	1	10	0	1	0	1
212 | 0	0	1	0	34	1	1	10	0	1	0	1
213 | 0	0	0	1	21	0	1	7	0	1	0	1
214 | 5.97026	0	1	0	33	0	0	12	0	1	0	1
215 | 1.85917	0	0	1	18	0	1	10	0	1	0	1
216 | 6.19194	0	1	0	26	1	0	12	0	1	0	1
217 | 7.28439	0	1	0	31	0	0	12	0	1	0	1
218 | .445831	0	1	0	35	0	1	10	0	1	0	1
219 | 0	0	1	0	20	0	0	12	0	1	0	1
220 | 0	0	1	0	25	0	1	11	0	1	0	1
221 | 7.36704	0	1	0	25	0	1	10	0	1	0	1
222 | 0	0	1	0	35	0	1	11	0	1	0	1
223 | 2.0155	0	1	0	20	0	1	10	0	1	0	1
224 | 15.7911	0	0	1	25	0	1	9	0	1	0	1
225 | 1.13547	0	1	0	27	0	1	10	0	1	0	1
226 | 6.37872	0	0	1	20	0	1	11	0	1	0	1
227 | 7.17619	0	1	0	26	0	1	11	0	1	0	1
228 | 0	0	1	0	38	0	1	8	0	1	0	1
229 | 7.95254	0	1	0	34	0	1	10	0	1	0	1
230 | 0	0	1	0	19	0	0	12	0	1	0	1
231 | 7.15213	0	1	0	32	0	1	8	0	1	0	1
232 | 8.329821	0	0	1	20	0	1	9	0	1	0	1
233 | 0	0	1	0	23	0	1	10	0	1	0	1
234 | 12.4299	0	1	0	38	0	1	10	0	1	0	1
235 | 0	0	1	0	24	0	1	11	0	1	0	1
236 | 5.08876	0	1	0	23	0	1	11	0	1	0	1
237 | 4.37404	0	0	1	20	0	1	7	0	1	0	1
238 | 1.55329	0	1	0	21	0	1	11	0	1	0	1
239 | 0	0	1	0	25	0	1	10	0	1	0	1
240 | 1.6983	0	1	0	22	1	1	11	0	1	0	1
241 | 0	0	1	0	23	0	1	11	0	1	0	1
242 | 11.2946	0	0	0	24	0	0	12	0	1	0	1
243 | 0	0	1	0	29	0	1	11	0	1	0	1
244 | 14.6264	0	1	0	24	0	1	11	0	1	0	1
245 | 12.8984	0	1	0	22	0	1	9	0	1	0	1
246 | 5.76713	0	1	0	28	0	1	11	0	1	0	1
247 | 6.52792	0	0	1	18	0	1	10	0	1	0	1
248 | 3.93124	0	1	0	26	0	1	10	0	1	0	1
249 | 20.9422	0	1	0	25	0	1	10	0	1	0	1
250 | 0	0	1	0	24	0	1	10	0	1	0	1
251 | 0	0	1	0	26	0	1	5	0	1	0	1
252 | 14.6904	0	1	0	36	0	1	10	0	1	0	1
253 | 0	0	1	0	22	0	1	11	0	1	0	1
254 | 3.4181	0	1	0	25	0	0	12	0	1	0	1
255 | 11.1973	0	1	0	27	0	1	11	0	1	0	1
256 | 0	0	1	0	29	0	1	8	0	1	0	1
257 | 0	0	1	0	24	0	0	12	0	1	0	1
258 | 0	0	1	0	22	0	1	10	0	1	0	1
259 | 1.45569	0	1	0	24	0	1	7	0	1	0	1
260 | 1.89094	0	1	0	29	0	0	12	0	1	0	1
261 | 4.48562	0	0	1	25	1	1	11	0	1	0	1
262 | 13.6134	0	1	0	30	0	0	12	0	1	0	1
263 | 1.39051	0	1	0	22	0	1	8	0	1	0	1
264 | 5.8438	0	1	0	55	0	1	3	0	1	0	1
265 | 8.598519	0	1	0	20	0	1	10	0	1	0	1
266 | 2.9202	0	1	0	34	0	1	11	0	1	0	1
267 | 0	0	1	0	22	0	0	12	0	1	0	1
268 | 6.73532	0	0	1	32	1	0	12	0	1	0	1
269 | 0	0	1	0	31	0	1	10	0	1	0	1
270 | 0	0	1	0	18	0	1	9	0	1	0	1
271 | 0	0	0	1	50	0	1	10	0	1	0	1
272 | .0447555	0	1	0	25	1	1	11	0	1	0	1
273 | 0	0	1	0	23	1	1	10	0	1	0	1
274 | 0	0	1	0	38	0	1	10	0	1	0	1
275 | 3.70181	0	1	0	25	1	1	10	0	1	0	1
276 | 6.93034	0	1	0	42	0	1	10	0	1	0	1
277 | 3.7958	0	1	0	39	1	0	12	0	1	0	1
278 | 5.19325	0	1	0	34	1	0	13	0	1	0	1
279 | 2.19353	0	1	0	24	0	1	7	0	1	0	1
280 | 11.1205	0	1	0	32	0	1	11	0	1	0	1
281 | 7.60952	0	1	0	27	0	0	13	0	1	0	1
282 | 2.16903	0	1	0	26	0	1	10	0	1	0	1
283 | 0	0	1	0	44	0	1	11	0	1	0	1
284 | 1.26423	0	1	0	25	0	1	11	0	1	0	1
285 | 0	0	1	0	25	0	0	12	0	1	0	1
286 | 0	0	1	0	28	1	0	12	0	1	0	1
287 | 0	0	1	0	32	0	1	10	0	1	0	1
288 | 0	0	1	0	22	0	1	10	0	1	0	1
289 | 5.71264	0	1	0	19	0	1	9	0	1	0	1
290 | 0	0	1	0	31	1	1	10	0	1	0	1
291 | 0	0	1	0	23	0	1	11	0	1	0	1
292 | 0	0	1	0	33	0	1	11	0	1	0	1
293 | 1.18488	0	1	0	27	0	1	10	0	1	0	1
294 | 10.2259	0	1	0	29	1	1	11	0	1	0	1
295 | 0	0	1	0	23	0	1	10	0	1	0	1
296 | 4.71537	0	1	0	25	1	1	9	0	1	0	1
297 | .28979	0	1	0	25	0	1	10	0	1	0	1
298 | 0	0	1	0	24	0	1	10	0	1	0	1
299 | 8.19042	0	0	0	28	0	1	8	0	1	0	1
300 | 4.81305	0	1	0	26	0	1	6	0	1	0	1
301 | 7.34468	0	1	0	30	1	0	14	0	1	0	1
302 | 0	0	1	0	25	1	1	10	0	1	0	1
303 | 0	0	1	0	29	1	1	11	0	1	0	1
304 | 0	0	1	0	25	1	0	12	0	1	0	1
305 | 0	0	1	0	28	0	0	13	0	1	0	1
306 | 4.35091	0	1	0	23	0	1	11	0	1	0	1
307 | 7.81252	0	1	0	54	0	1	11	0	1	0	1
308 | 0	0	0	1	33	0	1	5	0	1	0	1
309 | 3.64466	0	1	0	20	0	1	8	0	1	0	1
310 | 4.8448	0	1	0	45	0	1	9	0	1	0	1
311 | 0	0	1	0	39	0	1	6	0	1	0	1
312 | 0	0	1	0	26	0	0	12	0	1	0	1
313 | 0	0	1	0	23	0	1	10	0	1	0	1
314 | 14.7929	0	0	0	27	0	0	12	0	1	0	1
315 | 0	0	0	1	33	1	1	9	0	1	0	1
316 | 0	0	1	0	25	1	1	10	0	1	0	1
317 | 3.7467	0	1	0	23	0	1	8	0	1	0	1
318 | 1.56815	0	1	0	18	0	1	8	0	1	0	1
319 | 7.01044	0	1	0	17	0	1	8	0	1	0	1
320 | 3.81168	0	0	1	19	0	1	9	0	1	0	1
321 | 10.7986	0	0	1	18	0	1	8	0	1	0	1
322 | 4.65727	0	1	0	18	0	1	11	0	1	0	1
323 | 8.55153	0	1	0	17	0	1	11	0	1	0	1
324 | 4.30988	0	1	0	19	0	1	10	0	1	0	1
325 | 5.2864	0	1	0	19	0	1	10	0	1	0	1
326 | 12.4862	0	1	0	18	0	1	9	0	1	0	1
327 | 10.8774	0	1	0	18	0	1	9	0	1	0	1
328 | .202285	0	1	0	18	0	1	10	0	1	0	1
329 | 2.65771	0	1	0	17	0	1	10	0	1	0	1
330 | 4.13258	0	1	0	18	0	1	7	0	1	0	1
331 | 11.3031	0	1	0	18	0	1	11	0	1	0	1
332 | 0	0	0	1	19	0	1	10	0	1	0	1
333 | 0	0	1	0	18	0	1	9	0	1	0	1
334 | 0	0	1	0	17	0	1	10	0	1	0	1
335 | 2.18943	0	1	0	17	0	1	10	0	1	0	1
336 | 0	0	1	0	19	0	1	11	0	1	0	1
337 | 10.211	0	0	0	17	0	1	8	0	1	0	1
338 | 11.0481	0	1	0	18	0	1	10	0	1	0	1
339 | 0	0	1	0	18	0	1	9	0	1	0	1
340 | 8.99387	0	1	0	17	0	1	8	0	1	0	1
341 | 5.0718	0	0	1	19	0	1	6	0	1	0	1
342 | 3.19401	0	0	0	19	0	1	10	0	1	0	1
343 | 0	0	0	0	17	0	1	11	0	1	0	1
344 | 5.19309	0	1	0	20	0	1	9	0	1	0	1
345 | 0	0	1	0	17	0	1	9	0	1	0	1
346 | .275566	0	1	0	17	0	1	10	0	1	0	1
347 | 3.5907	0	1	0	17	0	1	9	0	1	0	1
348 | 0	0	1	0	19	0	1	11	0	1	0	1
349 | 12.7977	0	1	0	19	1	1	10	0	1	0	1
350 | 2.03591	0	1	0	20	0	1	9	0	1	0	1
351 | 2.38968	0	1	0	18	0	1	9	0	1	0	1
352 | 0	0	1	0	18	0	1	11	0	1	0	1
353 | 8.46928	0	0	1	17	0	1	10	0	1	0	1
354 | 0	0	1	0	19	0	1	11	0	1	0	1
355 | 1.14339	0	1	0	17	0	1	10	0	1	0	1
356 | 5.11481	0	0	1	17	0	1	9	0	1	0	1
357 | .781224	0	1	0	18	0	1	10	0	1	0	1
358 | 3.34322	0	1	0	21	0	1	9	.591499	0	0	1
359 | 9.602441	0	1	0	18	0	1	10	1.56325	0	0	1
360 | 0	0	1	0	19	0	1	11	1.62662	0	0	1
361 | 16.4616	0	1	0	24	0	1	9	2.7885	0	0	1
362 | 6.77162	0	0	1	28	1	1	11	3.47295	0	0	1
363 | 0	0	1	0	25	0	1	11	5.28125	0	0	1
364 | 11.0116	0	1	0	21	0	1	7	33.8	0	0	1
365 | 0	0	1	0	39	0	1	11	0	1	.0836896	0
366 | 0	0	1	0	36	0	0	12	0	1	.142397	0
367 | 0	0	1	0	24	0	0	12	0	1	.159885	0
368 | 4.25113	0	1	0	17	0	1	11	.989268	0	.165208	0
369 | 2.89167	0	1	0	18	0	1	10	.960427	0	.240107	0
370 | 5.51437	0	1	0	18	0	1	10	0	1	.273553	0
371 | 4.8589	0	1	0	28	0	1	10	1.47129	0	.367823	0
372 | 4.81258	0	0	0	27	0	0	13	5.21431	0	.474502	0
373 | 0	0	0	0	31	0	0	12	0	1	.494643	0
374 | .604199	0	1	0	22	0	1	9	0	1	.506408	0
375 | 14.5279	0	1	0	31	0	1	10	0	1	.520446	0
376 | 0	0	1	0	26	1	1	10	6.14037	0	.558773	0
377 | 7.3005	0	0	0	18	0	1	9	0	1	.559596	0
378 | 0	0	1	0	23	0	1	11	6.38231	0	.58079	0
379 | 4.15992	0	1	0	20	0	0	12	0	1	.591815	0
380 | 0	0	1	0	19	0	1	10	0	1	.604154	0
381 | 5.49759	0	1	0	18	0	1	11	1.0647	0	.645272	0
382 | 0	0	1	0	17	0	1	10	0	1	.664569	0
383 | 0	0	1	0	27	1	0	12	0	1	.75239	0
384 | 0	0	1	0	27	0	1	11	3.06519	0	.766299	0
385 | 16.477	0	0	0	28	0	0	12	0	1	.803343	0
386 | 0	0	1	0	28	0	1	11	2.43195	0	.86348	0
387 | 39.4835	0	1	0	21	0	1	10	6.66106	0	1.16236	0
388 | 11.3063	0	0	1	17	0	1	10	4.90512	0	1.1689	0
389 | 6.67202	0	1	0	26	0	1	11	4.69996	0	1.17499	0
390 | 9.378651	0	1	0	29	0	1	9	0	1	1.20382	0
391 | 5.08899	0	0	1	17	0	1	10	1.20361	0	1.23963	0
392 | 2.63929	0	1	0	22	0	1	11	7.91413	0	1.32166	0
393 | 9.4959	0	1	0	24	0	1	11	0	1	1.32799	0
394 | 20.8931	0	1	0	20	0	0	12	.557699	0	1.37147	0
395 | 0	0	1	0	18	0	0	12	0	1	1.40551	0
396 | 10.3617	0	0	0	24	0	1	11	2.66973	0	1.46838	0
397 | 1.7402	0	0	0	21	0	1	9	2.98841	0	1.57717	0
398 | 0	0	1	0	30	0	1	8	0	1	1.70666	0
399 | 0	0	1	0	31	0	1	11	17.7119	0	1.72645	0
400 | 6.35419	0	0	1	17	0	1	10	1.44268	0	1.73456	0
401 | 7.171	0	1	0	19	0	1	9	8.40963	0	1.77809	0
402 | 5.57355	0	1	0	23	0	1	11	0	1	1.89602	0
403 | .439688	0	1	0	22	0	0	12	4.38002	0	2.00368	0
404 | 16.97	0	1	0	29	0	0	12	22.8594	0	2.08021	0
405 | 5.34402	0	1	0	22	0	1	10	0	1	2.17496	0
406 | 2.72532	0	1	0	29	0	0	13	.718249	0	2.26558	0
407 | 9.772281	0	1	0	19	0	1	11	.721341	0	2.44559	0
408 | 0	0	1	0	17	0	1	9	0	1	2.59527	0
409 | 0	0	1	0	18	0	1	10	1.71651	0	2.68213	0
410 | 1.72091	0	1	0	19	0	0	12	8.417	0	2.8142	0
411 | 0	0	1	0	20	0	1	6	6.00688	0	2.85061	0
412 | 18.8599	0	1	0	33	0	1	11	10.5238	0	2.89982	0
413 | 1.32454	0	0	1	36	0	1	11	5.44373	0	3.06388	0
414 | .284658	0	1	0	25	0	1	11	15.21	0	3.07273	0
415 | 11.1959	0	1	0	19	0	1	11	3.50401	0	3.28568	0
416 | 0	0	1	0	23	0	1	8	7.72428	0	3.40306	0
417 | 0	0	1	0	17	0	1	11	4.08073	0	3.79603	0
418 | 7.56527	0	1	0	43	0	1	10	2.50287	0	4.12844	0
419 | 0	0	1	0	26	1	1	11	0	1	4.18473	0
420 | 0	0	1	0	27	1	1	11	0	1	4.49188	0
421 | 0	0	1	0	19	0	1	11	6.33749	0	4.50306	0
422 | 4.97459	0	1	0	28	1	1	11	8.593161	0	5.3939	0
423 | 12.78	0	1	0	28	1	0	12	10.5851	0	5.55146	0
424 | 3.52358	0	1	0	26	0	1	8	1.12629	0	5.5626	0
425 | 0	0	1	0	31	0	0	12	0	1	5.61391	0
426 | 10.2748	0	1	0	23	0	1	11	7.61736	0	5.71641	0
427 | 4.77972	0	1	0	20	0	0	12	7.18249	0	6.00473	0
428 | 16.9882	0	1	0	28	1	1	10	8.293349	0	6.44948	0
429 | .499257	0	1	0	39	1	0	12	19.7853	0	6.60814	0
430 | 3.08358	0	1	0	21	0	1	8	39.5707	0	6.6083	0
431 | 3.70872	0	1	0	22	0	1	11	8.81007	0	6.97448	0
432 | 7.65922	0	1	0	20	0	1	11	8.00916	0	7.66688	0
433 | 20.8578	0	0	1	21	0	1	11	2.99253	0	8.920469	0
434 | 7.07818	0	0	1	23	0	0	12	5.7217	0	8.96068	0
435 | 0	0	1	0	29	0	1	9	9.26894	0	9.16069	0
436 | 1.23984	0	1	0	28	1	1	9	10.2224	0	9.21045	0
437 | 3.9828	0	1	0	30	1	1	11	0	1	9.31194	0
438 | 0	0	1	0	25	1	1	10	13.52	0	9.319441	0
439 | 0	0	1	0	28	1	1	11	.824389	0	10.0339	0
440 | 7.09492	0	0	0	22	0	1	10	27.8644	0	10.5987	0
441 | 12.3593	0	1	0	44	1	1	9	12.2608	0	10.8572	0
442 | 0	0	1	0	21	0	1	9	31.8864	0	12.3572	0
443 | 0	0	1	0	28	0	1	11	17.4915	0	13.3713	0
444 | 16.9003	0	0	1	29	0	1	9	9.59431	0	16.3412	0
445 | 7.34396	0	1	0	25	1	1	9	24.7316	0	16.9466	0
446 | 5.4488	0	0	0	22	1	1	10	25.7209	0	23.032	0
447 | 


--------------------------------------------------------------------------------
/causalinference/utils/tools.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.stats import norm, logistic
  3 | 
  4 | from os import path
  5 | lalonde_file = path.join(path.dirname(__file__), 'lalonde_data.txt')
  6 | vignette_file = path.join(path.dirname(__file__), 'vignette_data.txt')
  7 | 
  8 | 
  9 | def convert_to_formatting(entry_types):
 10 | 
 11 | 	for entry_type in entry_types:
 12 | 		if entry_type == 'string':
 13 | 			yield 's'
 14 | 		elif entry_type == 'float':
 15 | 			yield '.3f'
 16 | 		elif entry_type == 'integer':
 17 | 			yield '.0f'
 18 | 
 19 | 
 20 | def add_row(entries, entry_types, col_spans, width):
 21 | 
 22 | 	#Convert an array of string or numeric entries into a string with
 23 | 	#even formatting and spacing.
 24 | 
 25 | 	vis_cols = len(col_spans)
 26 | 	invis_cols = sum(col_spans)
 27 | 
 28 | 	char_per_col = width // invis_cols
 29 | 	first_col_padding = width % invis_cols
 30 | 
 31 | 	char_spans = [char_per_col * col_span for col_span in col_spans]
 32 | 	char_spans[0] += first_col_padding
 33 | 	formatting = convert_to_formatting(entry_types)
 34 | 	line = ['%'+str(s)+f for (s,f) in zip(char_spans,formatting)]
 35 | 
 36 | 	return (''.join(line) % tuple(entries)) + '\n'
 37 | 
 38 | 
 39 | def add_line(width):
 40 | 
 41 | 	return '-'*width + '\n'
 42 | 
 43 | 
 44 | def gen_reg_entries(varname, coef, se):
 45 | 
 46 | 	z = coef / se
 47 | 	p = 2*(1 - norm.cdf(np.abs(z)))
 48 | 	lw = coef - 1.96*se
 49 | 	up = coef + 1.96*se
 50 | 
 51 | 	return (varname, coef, se, z, p, lw, up)
 52 | 
 53 | 
 54 | def random_data(N=5000, K=3, unobservables=False, **kwargs):
 55 | 
 56 | 	"""
 57 | 	Function that generates data according to one of two simple models that
 58 | 	satisfies the unconfoundedness assumption.
 59 | 
 60 | 	The covariates and error terms are generated according to
 61 | 		X ~ N(mu, Sigma), epsilon ~ N(0, Gamma).
 62 | 
 63 | 	The counterfactual outcomes are generated by
 64 | 		Y0 = X*beta + epsilon_0,
 65 | 		Y1 = delta + X*(beta+theta) + epsilon_1.
 66 | 
 67 | 	Selection is done according to the following propensity score function:
 68 | 		P(D=1|X) = Lambda(X*beta).
 69 | 
 70 | 	Here Lambda is the standard logistic CDF.
 71 | 
 72 | 	Parameters
 73 | 	----------
 74 | 	N: int
 75 | 		Number of units to draw. Defaults to 5000.
 76 | 	K: int
 77 | 		Number of covariates. Defaults to 3.
 78 | 	unobservables: bool
 79 | 		Returns potential outcomes and true propensity score
 80 | 		in addition to observed outcome and covariates if True.
 81 | 		Defaults to False.
 82 | 	mu, Sigma, Gamma, beta, delta, theta: NumPy ndarrays, optional
 83 | 		Parameter values appearing in data generating process.
 84 | 
 85 | 	Returns
 86 | 	-------
 87 | 	tuple
 88 | 		A tuple in the form of (Y, D, X) or (Y, D, X, Y0, Y1) of
 89 | 		observed outcomes, treatment indicators, covariate matrix,
 90 | 		and potential outomces.
 91 | 	"""
 92 | 
 93 | 	mu = kwargs.get('mu', np.zeros(K))
 94 | 	beta = kwargs.get('beta', np.ones(K))
 95 | 	theta = kwargs.get('theta', np.ones(K))
 96 | 	delta = kwargs.get('delta', 3)
 97 | 	Sigma = kwargs.get('Sigma', np.identity(K))
 98 | 	Gamma = kwargs.get('Gamma', np.identity(2))
 99 | 
100 | 	X = np.random.multivariate_normal(mean=mu, cov=Sigma, size=N)
101 | 	Xbeta = X.dot(beta)
102 | 	pscore = logistic.cdf(Xbeta)
103 | 	D = np.array([np.random.binomial(1, p, size=1) for p in pscore]).flatten()
104 | 
105 | 	epsilon = np.random.multivariate_normal(mean=np.zeros(2), cov=Gamma, size=N)
106 | 	Y0 = Xbeta + epsilon[:,0]
107 | 	Y1 = delta + X.dot(beta+theta) + epsilon[:,1]
108 | 	Y = (1-D)*Y0 + D*Y1
109 | 
110 | 	if unobservables:
111 | 		return Y, D, X, Y0, Y1, pscore
112 | 	else:
113 | 		return Y, D, X
114 | 
115 | 
116 | def read_tsv(filepath):
117 | 
118 | 	data = np.loadtxt(filepath, delimiter='\t', skiprows=1)
119 | 	Y = data[:,0]
120 | 	D = data[:,1]
121 | 	X = data[:,2:]
122 | 
123 | 	return Y, D, X
124 | 
125 | 
126 | def vignette_data():
127 | 
128 | 	return read_tsv(vignette_file)
129 | 
130 | 
131 | def lalonde_data():
132 | 
133 | 	return read_tsv(lalonde_file)
134 | 
135 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | 	$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help
 23 | help:
 24 | 	@echo "Please use \`make <target>' where <target> is one of"
 25 | 	@echo "  html       to make standalone HTML files"
 26 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 27 | 	@echo "  singlehtml to make a single large HTML file"
 28 | 	@echo "  pickle     to make pickle files"
 29 | 	@echo "  json       to make JSON files"
 30 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 31 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 32 | 	@echo "  applehelp  to make an Apple Help Book"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  epub3      to make an epub3"
 36 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 37 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 38 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 39 | 	@echo "  text       to make text files"
 40 | 	@echo "  man        to make manual pages"
 41 | 	@echo "  texinfo    to make Texinfo files"
 42 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 43 | 	@echo "  gettext    to make PO message catalogs"
 44 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 45 | 	@echo "  xml        to make Docutils-native XML files"
 46 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 47 | 	@echo "  linkcheck  to check all external links for integrity"
 48 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 49 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 50 | 	@echo "  dummy      to check syntax errors of document sources"
 51 | 
 52 | .PHONY: clean
 53 | clean:
 54 | 	rm -rf $(BUILDDIR)/*
 55 | 
 56 | .PHONY: html
 57 | html:
 58 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 61 | 
 62 | .PHONY: dirhtml
 63 | dirhtml:
 64 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 65 | 	@echo
 66 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 67 | 
 68 | .PHONY: singlehtml
 69 | singlehtml:
 70 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 71 | 	@echo
 72 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 73 | 
 74 | .PHONY: pickle
 75 | pickle:
 76 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 77 | 	@echo
 78 | 	@echo "Build finished; now you can process the pickle files."
 79 | 
 80 | .PHONY: json
 81 | json:
 82 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 83 | 	@echo
 84 | 	@echo "Build finished; now you can process the JSON files."
 85 | 
 86 | .PHONY: htmlhelp
 87 | htmlhelp:
 88 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 89 | 	@echo
 90 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 91 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 92 | 
 93 | .PHONY: qthelp
 94 | qthelp:
 95 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 96 | 	@echo
 97 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 98 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 99 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Causalinference.qhcp"
100 | 	@echo "To view the help file:"
101 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Causalinference.qhc"
102 | 
103 | .PHONY: applehelp
104 | applehelp:
105 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
106 | 	@echo
107 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
108 | 	@echo "N.B. You won't be able to view it unless you put it in" \
109 | 	      "~/Library/Documentation/Help or install it in your application" \
110 | 	      "bundle."
111 | 
112 | .PHONY: devhelp
113 | devhelp:
114 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
115 | 	@echo
116 | 	@echo "Build finished."
117 | 	@echo "To view the help file:"
118 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/Causalinference"
119 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Causalinference"
120 | 	@echo "# devhelp"
121 | 
122 | .PHONY: epub
123 | epub:
124 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
125 | 	@echo
126 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
127 | 
128 | .PHONY: epub3
129 | epub3:
130 | 	$(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
131 | 	@echo
132 | 	@echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
133 | 
134 | .PHONY: latex
135 | latex:
136 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
137 | 	@echo
138 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
139 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
140 | 	      "(use \`make latexpdf' here to do that automatically)."
141 | 
142 | .PHONY: latexpdf
143 | latexpdf:
144 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
145 | 	@echo "Running LaTeX files through pdflatex..."
146 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
147 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
148 | 
149 | .PHONY: latexpdfja
150 | latexpdfja:
151 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
152 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
153 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
154 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
155 | 
156 | .PHONY: text
157 | text:
158 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
159 | 	@echo
160 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
161 | 
162 | .PHONY: man
163 | man:
164 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
165 | 	@echo
166 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
167 | 
168 | .PHONY: texinfo
169 | texinfo:
170 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
171 | 	@echo
172 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
173 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
174 | 	      "(use \`make info' here to do that automatically)."
175 | 
176 | .PHONY: info
177 | info:
178 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
179 | 	@echo "Running Texinfo files through makeinfo..."
180 | 	make -C $(BUILDDIR)/texinfo info
181 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
182 | 
183 | .PHONY: gettext
184 | gettext:
185 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
186 | 	@echo
187 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
188 | 
189 | .PHONY: changes
190 | changes:
191 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
192 | 	@echo
193 | 	@echo "The overview file is in $(BUILDDIR)/changes."
194 | 
195 | .PHONY: linkcheck
196 | linkcheck:
197 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
198 | 	@echo
199 | 	@echo "Link check complete; look for any errors in the above output " \
200 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
201 | 
202 | .PHONY: doctest
203 | doctest:
204 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
205 | 	@echo "Testing of doctests in the sources finished, look at the " \
206 | 	      "results in $(BUILDDIR)/doctest/output.txt."
207 | 
208 | .PHONY: coverage
209 | coverage:
210 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
211 | 	@echo "Testing of coverage in the sources finished, look at the " \
212 | 	      "results in $(BUILDDIR)/coverage/python.txt."
213 | 
214 | .PHONY: xml
215 | xml:
216 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
217 | 	@echo
218 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
219 | 
220 | .PHONY: pseudoxml
221 | pseudoxml:
222 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
223 | 	@echo
224 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
225 | 
226 | .PHONY: dummy
227 | dummy:
228 | 	$(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
229 | 	@echo
230 | 	@echo "Build finished. Dummy builder generates no files."
231 | 


--------------------------------------------------------------------------------
/docs/_templates/layout.html:
--------------------------------------------------------------------------------
 1 | {% extends "!layout.html" %}
 2 | 
 3 | {% block footer %}
 4 | {{ super() }}
 5 | <script type="text/javascript">
 6 | var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
 7 | document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
 8 | </script>
 9 | <script type="text/javascript">
10 | try {
11 | var pageTracker = _gat._getTracker("UA-79268434-2");
12 | pageTracker._trackPageview();
13 | } catch(err) {}</script>
14 | {% endblock %}
15 | 


--------------------------------------------------------------------------------
/docs/causalinference.core.rst:
--------------------------------------------------------------------------------
 1 | causalinference.core package
 2 | ============================
 3 | 
 4 | causalinference.core.data module
 5 | --------------------------------
 6 | 
 7 | .. automodule:: causalinference.core.data
 8 |     :members:
 9 |     :show-inheritance:
10 | 
11 | causalinference.core.propensity module
12 | --------------------------------------
13 | 
14 | .. automodule:: causalinference.core.propensity
15 |     :members:
16 |     :show-inheritance:
17 | 
18 | causalinference.core.strata module
19 | ----------------------------------
20 | 
21 | .. automodule:: causalinference.core.strata
22 |     :members:
23 |     :show-inheritance:
24 | 
25 | causalinference.core.summary module
26 | -----------------------------------
27 | 
28 | .. automodule:: causalinference.core.summary
29 |     :members:
30 |     :show-inheritance:
31 | 
32 | 


--------------------------------------------------------------------------------
/docs/causalinference.estimators.rst:
--------------------------------------------------------------------------------
 1 | causalinference.estimators package
 2 | ==================================
 3 | 
 4 | causalinference.estimators.base module
 5 | --------------------------------------
 6 | 
 7 | .. automodule:: causalinference.estimators.base
 8 |     :members:
 9 |     :show-inheritance:
10 | 
11 | causalinference.estimators.blocking module
12 | ------------------------------------------
13 | 
14 | .. automodule:: causalinference.estimators.blocking
15 |     :members:
16 |     :show-inheritance:
17 | 
18 | causalinference.estimators.matching module
19 | ------------------------------------------
20 | 
21 | .. automodule:: causalinference.estimators.matching
22 |     :members:
23 |     :show-inheritance:
24 | 
25 | causalinference.estimators.ols module
26 | -------------------------------------
27 | 
28 | .. automodule:: causalinference.estimators.ols
29 |     :members:
30 |     :show-inheritance:
31 | 
32 | causalinference.estimators.weighting module
33 | -------------------------------------------
34 | 
35 | .. automodule:: causalinference.estimators.weighting
36 |     :members:
37 |     :show-inheritance:
38 | 
39 | 


--------------------------------------------------------------------------------
/docs/causalinference.rst:
--------------------------------------------------------------------------------
 1 | causalinference package
 2 | =======================
 3 | 
 4 | This package contains the ``CausalModel`` class, the main interface for assessing the tools of *Causalinference*.
 5 | 
 6 | CausalModel
 7 | -----------
 8 | 
 9 | .. automodule:: causalinference.causal
10 |    :members:
11 |    :show-inheritance:
12 | 
13 | Subpackages
14 | -----------
15 | 
16 | .. toctree::
17 |    :maxdepth: 1
18 | 
19 |    causalinference.core
20 |    causalinference.estimators
21 |    causalinference.utils
22 | 
23 | 


--------------------------------------------------------------------------------
/docs/causalinference.utils.rst:
--------------------------------------------------------------------------------
 1 | causalinference.utils package
 2 | =============================
 3 | 
 4 | causalinference.utils.tools module
 5 | ----------------------------------
 6 | 
 7 | .. automodule:: causalinference.utils.tools
 8 |     :members:
 9 |     :show-inheritance:
10 | 
11 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Causalinference documentation build configuration file, created by
  4 | # sphinx-quickstart on Sat May 21 18:45:25 2016.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys
 16 | import os
 17 | 
 18 | # If extensions (or modules to document with autodoc) are in another directory,
 19 | # add these directories to sys.path here. If the directory is relative to the
 20 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 21 | sys.path.insert(0, os.path.abspath('../'))
 22 | 
 23 | # -- General configuration ------------------------------------------------
 24 | 
 25 | # If your documentation needs a minimal Sphinx version, state it here.
 26 | #needs_sphinx = '1.0'
 27 | 
 28 | # Add any Sphinx extension module names here, as strings. They can be
 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 30 | # ones.
 31 | extensions = [
 32 |     'sphinx.ext.autodoc',
 33 |     'sphinx.ext.autosummary',
 34 |     'numpydoc',
 35 | ]
 36 | 
 37 | # Add any paths that contain templates here, relative to this directory.
 38 | templates_path = ['_templates']
 39 | 
 40 | # The suffix(es) of source filenames.
 41 | # You can specify multiple suffix as a list of string:
 42 | # source_suffix = ['.rst', '.md']
 43 | source_suffix = '.rst'
 44 | 
 45 | # The encoding of source files.
 46 | #source_encoding = 'utf-8-sig'
 47 | 
 48 | # The master toctree document.
 49 | master_doc = 'index'
 50 | 
 51 | # General information about the project.
 52 | project = u'Causalinference'
 53 | copyright = u'2016, Laurence Wong'
 54 | author = u'Laurence Wong'
 55 | 
 56 | # The version info for the project you're documenting, acts as replacement for
 57 | # |version| and |release|, also used in various other places throughout the
 58 | # built documents.
 59 | #
 60 | # The short X.Y version.
 61 | version = u'0.1.3'
 62 | # The full version, including alpha/beta/rc tags.
 63 | release = u'0.1.3'
 64 | 
 65 | # The language for content autogenerated by Sphinx. Refer to documentation
 66 | # for a list of supported languages.
 67 | #
 68 | # This is also used if you do content translation via gettext catalogs.
 69 | # Usually you set "language" from the command line for these cases.
 70 | language = None
 71 | 
 72 | # There are two options for replacing |today|: either, you set today to some
 73 | # non-false value, then it is used:
 74 | #today = ''
 75 | # Else, today_fmt is used as the format for a strftime call.
 76 | #today_fmt = '%B %d, %Y'
 77 | 
 78 | # List of patterns, relative to source directory, that match files and
 79 | # directories to ignore when looking for source files.
 80 | # This patterns also effect to html_static_path and html_extra_path
 81 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 82 | 
 83 | # The reST default role (used for this markup: `text`) to use for all
 84 | # documents.
 85 | #default_role = None
 86 | 
 87 | # If true, '()' will be appended to :func: etc. cross-reference text.
 88 | #add_function_parentheses = True
 89 | 
 90 | # If true, the current module name will be prepended to all description
 91 | # unit titles (such as .. function::).
 92 | #add_module_names = True
 93 | 
 94 | # If true, sectionauthor and moduleauthor directives will be shown in the
 95 | # output. They are ignored by default.
 96 | #show_authors = False
 97 | 
 98 | # The name of the Pygments (syntax highlighting) style to use.
 99 | pygments_style = 'sphinx'
100 | 
101 | # A list of ignored prefixes for module index sorting.
102 | #modindex_common_prefix = []
103 | 
104 | # If true, keep warnings as "system message" paragraphs in the built documents.
105 | #keep_warnings = False
106 | 
107 | # If true, `todo` and `todoList` produce output, else they produce nothing.
108 | todo_include_todos = False
109 | 
110 | 
111 | # -- Options for HTML output ----------------------------------------------
112 | 
113 | # The theme to use for HTML and HTML Help pages.  See the documentation for
114 | # a list of builtin themes.
115 | html_theme = 'classic'
116 | 
117 | # Theme options are theme-specific and customize the look and feel of a theme
118 | # further.  For a list of options available for each theme, see the
119 | # documentation.
120 | #html_theme_options = {}
121 | 
122 | # Add any paths that contain custom themes here, relative to this directory.
123 | #html_theme_path = []
124 | 
125 | # The name for this set of Sphinx documents.
126 | # "<project> v<release> documentation" by default.
127 | #html_title = u'Causalinference v0.1.3'
128 | 
129 | # A shorter title for the navigation bar.  Default is the same as html_title.
130 | #html_short_title = None
131 | 
132 | # The name of an image file (relative to this directory) to place at the top
133 | # of the sidebar.
134 | #html_logo = None
135 | 
136 | # The name of an image file (relative to this directory) to use as a favicon of
137 | # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
138 | # pixels large.
139 | html_favicon = 'favicon.png'
140 | 
141 | # Add any paths that contain custom static files (such as style sheets) here,
142 | # relative to this directory. They are copied after the builtin static files,
143 | # so a file named "default.css" will overwrite the builtin "default.css".
144 | html_static_path = ['_static']
145 | 
146 | # Add any extra paths that contain custom files (such as robots.txt or
147 | # .htaccess) here, relative to this directory. These files are copied
148 | # directly to the root of the documentation.
149 | #html_extra_path = []
150 | 
151 | # If not None, a 'Last updated on:' timestamp is inserted at every page
152 | # bottom, using the given strftime format.
153 | # The empty string is equivalent to '%b %d, %Y'.
154 | #html_last_updated_fmt = None
155 | 
156 | # If true, SmartyPants will be used to convert quotes and dashes to
157 | # typographically correct entities.
158 | #html_use_smartypants = True
159 | 
160 | # Custom sidebar templates, maps document names to template names.
161 | #html_sidebars = {}
162 | 
163 | # Additional templates that should be rendered to pages, maps page names to
164 | # template names.
165 | #html_additional_pages = {}
166 | 
167 | # If false, no module index is generated.
168 | html_domain_indices = False
169 | 
170 | # If false, no index is generated.
171 | #html_use_index = True
172 | 
173 | # If true, the index is split into individual pages for each letter.
174 | #html_split_index = False
175 | 
176 | # If true, links to the reST sources are added to the pages.
177 | html_show_sourcelink = False
178 | 
179 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
180 | html_show_sphinx = False
181 | 
182 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
183 | #html_show_copyright = True
184 | 
185 | # If true, an OpenSearch description file will be output, and all pages will
186 | # contain a <link> tag referring to it.  The value of this option must be the
187 | # base URL from which the finished HTML is served.
188 | #html_use_opensearch = ''
189 | 
190 | # This is the file name suffix for HTML files (e.g. ".xhtml").
191 | #html_file_suffix = None
192 | 
193 | # Language to be used for generating the HTML full-text search index.
194 | # Sphinx supports the following languages:
195 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
196 | #   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
197 | #html_search_language = 'en'
198 | 
199 | # A dictionary with options for the search language support, empty by default.
200 | # 'ja' uses this config value.
201 | # 'zh' user can custom change `jieba` dictionary path.
202 | #html_search_options = {'type': 'default'}
203 | 
204 | # The name of a javascript file (relative to the configuration directory) that
205 | # implements a search results scorer. If empty, the default will be used.
206 | #html_search_scorer = 'scorer.js'
207 | 
208 | # Output file base name for HTML help builder.
209 | htmlhelp_basename = 'Causalinferencedoc'
210 | 
211 | # -- Options for LaTeX output ---------------------------------------------
212 | 
213 | latex_elements = {
214 | # The paper size ('letterpaper' or 'a4paper').
215 | #'papersize': 'letterpaper',
216 | 
217 | # The font size ('10pt', '11pt' or '12pt').
218 | #'pointsize': '10pt',
219 | 
220 | # Additional stuff for the LaTeX preamble.
221 | #'preamble': '',
222 | 
223 | # Latex figure (float) alignment
224 | #'figure_align': 'htbp',
225 | }
226 | 
227 | # Grouping the document tree into LaTeX files. List of tuples
228 | # (source start file, target name, title,
229 | #  author, documentclass [howto, manual, or own class]).
230 | latex_documents = [
231 |     (master_doc, 'Causalinference.tex', u'Causalinference Documentation',
232 |      u'Laurence Wong', 'manual'),
233 | ]
234 | 
235 | # The name of an image file (relative to this directory) to place at the top of
236 | # the title page.
237 | #latex_logo = None
238 | 
239 | # For "manual" documents, if this is true, then toplevel headings are parts,
240 | # not chapters.
241 | #latex_use_parts = False
242 | 
243 | # If true, show page references after internal links.
244 | #latex_show_pagerefs = False
245 | 
246 | # If true, show URL addresses after external links.
247 | #latex_show_urls = False
248 | 
249 | # Documents to append as an appendix to all manuals.
250 | #latex_appendices = []
251 | 
252 | # If false, no module index is generated.
253 | #latex_domain_indices = True
254 | 
255 | 
256 | # -- Options for manual page output ---------------------------------------
257 | 
258 | # One entry per manual page. List of tuples
259 | # (source start file, name, description, authors, manual section).
260 | man_pages = [
261 |     (master_doc, 'causalinference', u'Causalinference Documentation',
262 |      [author], 1)
263 | ]
264 | 
265 | # If true, show URL addresses after external links.
266 | #man_show_urls = False
267 | 
268 | 
269 | # -- Options for Texinfo output -------------------------------------------
270 | 
271 | # Grouping the document tree into Texinfo files. List of tuples
272 | # (source start file, target name, title, author,
273 | #  dir menu entry, description, category)
274 | texinfo_documents = [
275 |     (master_doc, 'Causalinference', u'Causalinference Documentation',
276 |      author, 'Causalinference', 'One line description of project.',
277 |      'Miscellaneous'),
278 | ]
279 | 
280 | # Documents to append as an appendix to all manuals.
281 | #texinfo_appendices = []
282 | 
283 | # If false, no module index is generated.
284 | #texinfo_domain_indices = True
285 | 
286 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
287 | #texinfo_show_urls = 'footnote'
288 | 
289 | # If true, do not generate a @detailmenu in the "Top" node's menu.
290 | #texinfo_no_detailmenu = False
291 | 
292 | # -- Custom configuration ------------------------------------------------
293 | 
294 | autodoc_member_order = 'bysource'
295 | numpydoc_show_class_members = False
296 | 


--------------------------------------------------------------------------------
/docs/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laurencium/Causalinference/630e8fb195754a720da41791b725d3dadabfb257/docs/favicon.png


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. Causalinference documentation master file, created by
 2 |    sphinx-quickstart on Fri May 20 18:53:32 2016.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Causal Inference in Python
 7 | ==========================
 8 | 
 9 | *Causal Inference in Python*, or *Causalinference* in short, is a software package that implements various statistical and econometric methods used in the field variously known as Causal Inference, Program Evaluation, or Treatment Effect Analysis.
10 | 
11 | Work on *Causalinference* started in 2014 by Laurence Wong as a personal side project. It is distributed under the 3-Clause BSD license.
12 | 
13 | Important Links
14 | ---------------
15 | 
16 | The official website for *Causalinference* is
17 | 
18 |   https://causalinferenceinpython.org
19 | 
20 | The most current development version is hosted on GitHub at
21 | 
22 |   https://github.com/laurencium/causalinference
23 | 
24 | Package source and binary distribution files are available from PyPi at
25 | 
26 |   https://pypi.python.org/pypi/causalinference
27 | 
28 | For an overview of the main features and uses of *Causalinference*, please refer to
29 | 
30 |   https://github.com/laurencium/causalinference/blob/master/docs/tex/vignette.pdf
31 | 
32 | A blog dedicated to providing a more detailed walkthrough of *Causalinference* and the econometric theory behind it can be found at
33 | 
34 |   https://laurencewong.com/software/
35 | 
36 | Main Features
37 | -------------
38 | 
39 | * Assessment of overlap in covariate distributions
40 | * Estimation of propensity score
41 | * Improvement of covariate balance through trimming
42 | * Subclassification on propensity score
43 | * Estimation of treatment effects via matching, blocking, weighting, and least squares
44 | 
45 | Dependencies
46 | ------------
47 | 
48 | * NumPy: 1.8.2 or higher
49 | * SciPy: 0.13.3 or higher
50 | 
51 | Installation
52 | ------------
53 | 
54 | *Causalinference* can be installed using ``pip``: ::
55 | 
56 |   $ pip install causalinference
57 | 
58 | For help on setting up Pip, NumPy, and SciPy on Macs, check out this excellent `guide <http://www.sourabhbajaj.com/mac-setup>`_.
59 | 
60 | Minimal Example
61 | ---------------
62 | 
63 | The following illustrates how to create an instance of CausalModel: ::
64 | 
65 |   >>> from causalinference import CausalModel
66 |   >>> from causalinference.utils import random_data
67 |   >>> Y, D, X = random_data()
68 |   >>> causal = CausalModel(Y, D, X)
69 | 
70 | Invoking ``help`` on ``causal`` at this point should return a comprehensive listing of all the causal analysis tools available in *Causalinference*.
71 | 
72 | Detailed Documentation
73 | ----------------------
74 | .. toctree::
75 |    :maxdepth: 3
76 | 
77 |    causalinference
78 | 
79 | Site Navigation
80 | ---------------
81 | 
82 | * :ref:`genindex`
83 | * :ref:`search`
84 | 
85 | 


--------------------------------------------------------------------------------
/docs/tex/references.bib:
--------------------------------------------------------------------------------
   1 | % This file was created with JabRef 2.7b.
   2 | % Encoding: Cp1252
   3 | 
   4 | @BOOK{vanderVaart.1998,
   5 |   title = {Asymptotic {Statistics}},
   6 |   publisher = {Cambridge University Press},
   7 |   year = {1998},
   8 |   author = {{van der Vaart}, Aad W.},
   9 |   owner = {alumni},
  10 |   timestamp = {2010.04.30}
  11 | }
  12 | 
  13 | @BOOK{vanderVaartWellner.1996,
  14 |   title = {Weak {Convergence} and {Empirical} {Processes}},
  15 |   publisher = {Springer},
  16 |   year = {1996},
  17 |   author = {{van der Vaart}, Aad W. and Wellner, Jon A.},
  18 |   address = {New York},
  19 |   owner = {alumni},
  20 |   timestamp = {2010.04.30}
  21 | }
  22 | 
  23 | @ARTICLE{AbadieAngristImbens.2002,
  24 |   author = {Alberto Abadie and Joshua Angrist and Guido Imbens},
  25 |   title = {Instrumental variables estimates of the effect of subsidized training
  26 | 	on the quantiles of trainee earnings},
  27 |   journal = {Econometrica},
  28 |   year = {2002},
  29 |   volume = {70},
  30 |   pages = {91-117},
  31 |   owner = {Laurence},
  32 |   timestamp = {2010.03.02}
  33 | }
  34 | 
  35 | @ARTICLE{AbadieDiamondHainmueller.2010,
  36 |   author = {Alberto Abadie and Alexis Diamond and Jens Hainmueller},
  37 |   title = {Synthetic control methods for comparative case studies: estimating
  38 | 	the effect of California's tobacco control program},
  39 |   journal = {Journal of American Statistical Association},
  40 |   year = {2010},
  41 |   volume = {105},
  42 |   pages = {493-505},
  43 |   owner = {Laurence},
  44 |   timestamp = {2010.01.02}
  45 | }
  46 | 
  47 | @ARTICLE{AbadieGardeazabal.2003,
  48 |   author = {Alberto Abadie and Javier Gardeazabal},
  49 |   title = {The economic costs of conflict: a case study of the Basque country},
  50 |   journal = {American Economic Review},
  51 |   year = {2003},
  52 |   volume = {93},
  53 |   pages = {113-132},
  54 |   owner = {Laurence},
  55 |   timestamp = {2010.01.02}
  56 | }
  57 | 
  58 | @ARTICLE{AbadieImbens.2006,
  59 |   author = {Alberto Abadie and Guido Imbens},
  60 |   title = {Large Sample Properties of Matching Estimators for Average Treatment
  61 | 	Effects},
  62 |   journal = {Econometrica},
  63 |   year = {2006},
  64 |   volume = {74},
  65 |   pages = {235-267},
  66 |   owner = {laurence},
  67 |   timestamp = {2014.10.05}
  68 | }
  69 | 
  70 | @ARTICLE{Allcott.2011,
  71 |   author = {Hunt Allcott},
  72 |   title = {Social {Norms} and {Energy} {Conservation}},
  73 |   journal = {Journal of Public Economics},
  74 |   year = {2011},
  75 |   volume = {95},
  76 |   pages = {1082-1095},
  77 |   owner = {laurencium},
  78 |   timestamp = {2013.10.18}
  79 | }
  80 | 
  81 | @BOOK{Amemiya.1985,
  82 |   title = {Advanced econometrics},
  83 |   publisher = {Harvard University Press},
  84 |   year = {1985},
  85 |   author = {Takeshi Amemiya},
  86 |   address = {Cambridge, MA},
  87 |   owner = {Laurence},
  88 |   timestamp = {2010.01.01}
  89 | }
  90 | 
  91 | @ARTICLE{Andrews.1991,
  92 |   author = {Donald W. K. Andrews},
  93 |   title = {Heteroskedasticity and autocorrelation consistent covariance matrix
  94 | 	estimation},
  95 |   journal = {Econometrica},
  96 |   year = {1991},
  97 |   volume = {59},
  98 |   pages = {817-858},
  99 |   owner = {Laurence},
 100 |   timestamp = {2010.08.26}
 101 | }
 102 | 
 103 | @ARTICLE{AngristChernozhukovFernandezVal.2006,
 104 |   author = {Joshua Angrist and Victor Chernozhukov and Ivan Fernandez-Val},
 105 |   title = {Quantile regression under misspecification, with an application to
 106 | 	the U.S. wage structure},
 107 |   journal = {Econometrica},
 108 |   year = {2006},
 109 |   volume = {74},
 110 |   pages = {539-563},
 111 |   owner = {Laurence},
 112 |   timestamp = {2010.01.02}
 113 | }
 114 | 
 115 | @ARTICLE{Angrist.1990,
 116 |   author = {Joshua D. Angrist},
 117 |   title = {Lifetime {Earnings} and the {Vietnam} {Era} {Draft} {Lottery}: {Evidence}
 118 | 	from {Social} {Security} {Administrative} {Records}},
 119 |   journal = {American Economic Review},
 120 |   year = {1990},
 121 |   volume = {80},
 122 |   pages = {313-336},
 123 |   owner = {Laurence},
 124 |   timestamp = {2010.07.14}
 125 | }
 126 | 
 127 | @ARTICLE{AngristImbensRubin.1996,
 128 |   author = {Joshua D. Angrist and Guido W. Imbens and Donald B. Rubin},
 129 |   title = {Identification of {Causal} {Effects} {Using} {Instrumental} {Variables}},
 130 |   journal = {Journal of the American Statistical Association},
 131 |   year = {1996},
 132 |   volume = {91},
 133 |   pages = {444-455},
 134 |   owner = {Laurence},
 135 |   timestamp = {2010.02.11}
 136 | }
 137 | 
 138 | @BOOK{AngristPischke.2009,
 139 |   title = {Mostly harmless econometrics: an empiricist's companion},
 140 |   publisher = {Princeton University Press},
 141 |   year = {2009},
 142 |   author = {Joshua D. Angrist and Jorn-Stefeen Pischke},
 143 |   owner = {Laurence},
 144 |   timestamp = {2010.01.03}
 145 | }
 146 | 
 147 | @ARTICLE{Arellano.1987,
 148 |   author = {Manuel Arellano},
 149 |   title = {Computing robust standard errors for within-in groups estimators},
 150 |   journal = {Oxford Bulletin of Economics and Statistics},
 151 |   year = {1987},
 152 |   volume = {49},
 153 |   pages = {431-434},
 154 |   owner = {Laurence},
 155 |   timestamp = {2010.01.08}
 156 | }
 157 | 
 158 | @ARTICLE{AtheyImbens.2006,
 159 |   author = {Susan Athey and Guido W. Imbens},
 160 |   title = {Identification and inference in nonlinear difference-in-differences
 161 | 	models},
 162 |   journal = {Econometrica},
 163 |   year = {2006},
 164 |   volume = {74},
 165 |   pages = {431-497},
 166 |   owner = {Laurence},
 167 |   timestamp = {2010.08.12}
 168 | }
 169 | 
 170 | @ARTICLE{Bai.2009,
 171 |   author = {Jushan Bai},
 172 |   title = {Panel data models with interactive fixed effects},
 173 |   journal = {Econometrica},
 174 |   year = {2009},
 175 |   volume = {77},
 176 |   pages = {1229-1279},
 177 |   owner = {Laurence},
 178 |   timestamp = {2010.01.07}
 179 | }
 180 | 
 181 | @ARTICLE{Beran.1995,
 182 |   author = {Rudolf Beran},
 183 |   title = {Stein confidence sets and the bootstrap},
 184 |   journal = {Statistica Sinica},
 185 |   year = {1995},
 186 |   volume = {5},
 187 |   pages = {109-127},
 188 |   owner = {Laurence},
 189 |   timestamp = {2011.03.16}
 190 | }
 191 | 
 192 | @ARTICLE{Berry.1994,
 193 |   author = {Steven T. Berry},
 194 |   title = {Estimating discrete-choice models of product differentiation},
 195 |   journal = {RAND Journal of Economics},
 196 |   year = {1994},
 197 |   volume = {25},
 198 |   pages = {242-262},
 199 |   owner = {Laurence},
 200 |   timestamp = {2010.01.01}
 201 | }
 202 | 
 203 | @ARTICLE{BerryLevinsohnPakes.1995,
 204 |   author = {Steven T. Berry and James Levinsohn and Ariel Pakes},
 205 |   title = {Automobile prices in market equilibrium},
 206 |   journal = {Econometrica},
 207 |   year = {1995},
 208 |   volume = {63},
 209 |   pages = {841-890},
 210 |   owner = {Laurence},
 211 |   timestamp = {2010.01.01}
 212 | }
 213 | 
 214 | @ARTICLE{BertrandDufloMullainathan.2004,
 215 |   author = {Marianne Bertrand and Esther Duflo and Sendhil Mullainathan},
 216 |   title = {How much should we trust differences-in-differences estimates?},
 217 |   journal = {Quarterly Journal of Economics},
 218 |   year = {2004},
 219 |   volume = {119},
 220 |   pages = {249-275},
 221 |   owner = {Laurence},
 222 |   timestamp = {2010.02.11}
 223 | }
 224 | 
 225 | @ARTICLE{BickelRitovTsybakov.2009,
 226 |   author = {Peter J. Bickel and Ya'acov Ritov and Alexandre B. Tsybakov},
 227 |   title = {Simultaneous analysis of lasso and Dantzig selector},
 228 |   journal = {Annals of Statistics},
 229 |   year = {2009},
 230 |   volume = {37},
 231 |   pages = {1705-1732},
 232 |   owner = {Laurence},
 233 |   timestamp = {2011.03.16}
 234 | }
 235 | 
 236 | @BOOK{Billingsley.1995,
 237 |   title = {Probability and measure},
 238 |   publisher = {Wiley},
 239 |   year = {1995},
 240 |   author = {Patrick Billingsley},
 241 |   address = {New York},
 242 |   edition = {3rd},
 243 |   owner = {alumni},
 244 |   timestamp = {2010.04.30}
 245 | }
 246 | 
 247 | @ARTICLE{BitlerGelbachHoynes.2006,
 248 |   author = {Marianne P. Bitler and Jonah B. Gelbach and Hilary W. Hoynes},
 249 |   title = {What mean impacts miss: distributional effects of welfare reform
 250 | 	experiments},
 251 |   journal = {American Economic Review},
 252 |   year = {2006},
 253 |   volume = {96},
 254 |   pages = {988-1012},
 255 |   owner = {Laurence},
 256 |   timestamp = {2010.03.06}
 257 | }
 258 | 
 259 | @BOOK{BoydVandenberghe.2004,
 260 |   title = {Convex {Optimization}},
 261 |   publisher = {Cambridge University Press},
 262 |   year = {2004},
 263 |   author = {Stephen Boyd and Lieven Vandenberghe},
 264 |   owner = {laurencium},
 265 |   timestamp = {2013.11.14}
 266 | }
 267 | 
 268 | @BOOK{CameronTrivedi.2005,
 269 |   title = {Microeconometrics: methods and applications},
 270 |   publisher = {Cambridge University Press},
 271 |   year = {2005},
 272 |   author = {A. Colin Cameron and Pravin K. Trivedi},
 273 |   owner = {Laurence},
 274 |   timestamp = {2010.01.01}
 275 | }
 276 | 
 277 | @BOOK{CampbellLoMacKinlay.1997,
 278 |   title = {The econometrics of financial markets},
 279 |   publisher = {Princeton University Press},
 280 |   year = {1997},
 281 |   author = {John Y. Campbell and Andrew W. Lo and A. Craig MacKinlay},
 282 |   owner = {Laurence},
 283 |   timestamp = {2010.01.05}
 284 | }
 285 | 
 286 | @ARTICLE{CandesTao.2007,
 287 |   author = {Emmanuel Candes and Terence Tao},
 288 |   title = {The Dantzig selector: statisical estimation when $p$ is much larger
 289 | 	than $n$},
 290 |   journal = {Annals of Statistics},
 291 |   year = {2007},
 292 |   volume = {35},
 293 |   pages = {2313-2351},
 294 |   owner = {Laurence},
 295 |   timestamp = {2011.03.16}
 296 | }
 297 | 
 298 | @BOOK{CapinskiKopp.2004,
 299 |   title = {Measure, integral and probability},
 300 |   publisher = {Springer},
 301 |   year = {2004},
 302 |   author = {Capi\'{n}ski, Marek and Kopp, Ekkehard},
 303 |   address = {London},
 304 |   edition = {2nd},
 305 |   owner = {alumni},
 306 |   timestamp = {2010.04.30}
 307 | }
 308 | 
 309 | @ARTICLE{CardHyslop.2005,
 310 |   author = {David Card and Dean R. Hyslop},
 311 |   title = {Estimating the {Effects} of a {Time-limited} {Earnings} {Subsidy}
 312 | 	for {Welfare-leavers}},
 313 |   journal = {Econometrica},
 314 |   year = {2005},
 315 |   volume = {73},
 316 |   pages = {1723-1770},
 317 |   owner = {laurencium},
 318 |   timestamp = {2013.10.18}
 319 | }
 320 | 
 321 | @ARTICLE{CardKrueger.1994,
 322 |   author = {David Card and Alan B. Krueger},
 323 |   title = {Minimum Wages and Employment: A Case Study of the Fast-Food Industry
 324 | 	in New Jersey and Pennsylvania},
 325 |   journal = {American Economic Review},
 326 |   year = {1994},
 327 |   volume = {84},
 328 |   pages = {772-793},
 329 |   owner = {laurence},
 330 |   timestamp = {2014.10.25}
 331 | }
 332 | 
 333 | @ARTICLE{CardMcCall.1996,
 334 |   author = {David Card and Brain P. McCall},
 335 |   title = {Is {Workers'} {Compensation} {Covering} {Uninsured} {Medical} {Costs}?
 336 | 	Evidence from the {`Monday Effect'}},
 337 |   journal = {Industrial and Labor Relations Review},
 338 |   year = {1996},
 339 |   volume = {49},
 340 |   pages = {690-706},
 341 |   owner = {laurencium},
 342 |   timestamp = {2013.10.18}
 343 | }
 344 | 
 345 | @ARTICLE{ChernozhukovHansen.2008,
 346 |   author = {Victor Chernozhukov and Christian Hansen},
 347 |   title = {Instrumental variable quantile regressoin: a robust inference approach},
 348 |   journal = {Journal of Econometrics},
 349 |   year = {2008},
 350 |   volume = {142},
 351 |   pages = {379-398},
 352 |   owner = {Laurence},
 353 |   timestamp = {2010.01.01}
 354 | }
 355 | 
 356 | @ARTICLE{ChernozhukovHansen.2006,
 357 |   author = {Victor Chernozhukov and Christian Hansen},
 358 |   title = {Instrumental quantile regression inference for structural and treatment
 359 | 	effect models},
 360 |   journal = {Journal of Econometrics},
 361 |   year = {2006},
 362 |   volume = {132},
 363 |   pages = {491-525},
 364 |   owner = {Laurence},
 365 |   timestamp = {2010.01.15}
 366 | }
 367 | 
 368 | @ARTICLE{ChernozhukovHansen.2005,
 369 |   author = {Victor Chernozhukov and Christian Hansen},
 370 |   title = {An IV model of quantile treatment effects},
 371 |   journal = {Econometrica},
 372 |   year = {2005},
 373 |   volume = {73},
 374 |   pages = {245-261},
 375 |   owner = {Laurence},
 376 |   timestamp = {2010.01.01}
 377 | }
 378 | 
 379 | @ARTICLE{ChernozhukovHansen.2004,
 380 |   author = {Victor Chernozhukov and Christian Hansen},
 381 |   title = {The effects of 401(k) participation on the wealth distribution: an
 382 | 	instrumental quantile regression analysis},
 383 |   journal = {Review of Economics and Statistics},
 384 |   year = {2004},
 385 |   volume = {86},
 386 |   pages = {735-751},
 387 |   owner = {Laurence},
 388 |   timestamp = {2010.03.06}
 389 | }
 390 | 
 391 | @BOOK{CLRS.2009,
 392 |   title = {Introduction to Algorithms},
 393 |   publisher = {MIT Press},
 394 |   year = {2009},
 395 |   author = {Thomas H. Cormen and Charles E. Leiserson and Ronald L. Rivest and
 396 | 	Clifford Stein},
 397 |   owner = {laurence},
 398 |   timestamp = {2014.10.19}
 399 | }
 400 | 
 401 | @ARTICLE{CrumpHotzImbensMitnik.2009,
 402 |   author = {Crump, R. and Hotz, V. J. and Imbens, G. and Mitnik, O.},
 403 |   title = {Dealing with Limited Overlap in Estimation of Average Treatment Effects},
 404 |   journal = {Biometrika},
 405 |   year = {2009},
 406 |   volume = {96},
 407 |   pages = {187-199},
 408 |   owner = {laurence},
 409 |   timestamp = {2015.08.30}
 410 | }
 411 | 
 412 | @BOOK{Csorgo.1983,
 413 |   title = {Quantile processes with statistical applications},
 414 |   publisher = {SIAM},
 415 |   year = {1983},
 416 |   author = {Mikl\'{o}s Cs\"{o}rg\H{o}},
 417 |   series = {CBMS-NSF Regional Conference Series in Applied Mathematics 42},
 418 |   address = {Philadelphia},
 419 |   owner = {laurence},
 420 |   timestamp = {2012.10.29}
 421 | }
 422 | 
 423 | @BOOK{Davidson.1994,
 424 |   title = {Stochastic limit theory},
 425 |   publisher = {Oxford University Press},
 426 |   year = {1994},
 427 |   author = {James Davidson},
 428 |   owner = {alumni},
 429 |   timestamp = {2010.04.30}
 430 | }
 431 | 
 432 | @BOOK{DavidsonMacKinnon.2004,
 433 |   title = {Econometric theory and methods},
 434 |   publisher = {Oxford University Press},
 435 |   year = {2004},
 436 |   author = {Davidson, Russell and MacKinnon, James Gordon}
 437 | }
 438 | 
 439 | @BOOK{DavidsonMacKinnon.1993,
 440 |   title = {Estimation and inference in econometrics},
 441 |   publisher = {Oxford University Press},
 442 |   year = {1993},
 443 |   author = {Davidson, Russell and MacKinnon, James Gordon}
 444 | }
 445 | 
 446 | @ARTICLE{DehejiaWahba.2002,
 447 |   author = {Rajeev H. Dehejia and Sadek Wahba},
 448 |   title = {Propensity score-matching methods for nonexperimental causal studies},
 449 |   journal = {Review of Economics and Statistics},
 450 |   year = {2002},
 451 |   volume = {84},
 452 |   pages = {151-161},
 453 |   owner = {Laurence},
 454 |   timestamp = {2010.01.02}
 455 | }
 456 | 
 457 | @ARTICLE{DehejiaWahba.1999,
 458 |   author = {Rajeev H. Dehejia and Sadek Wahba},
 459 |   title = {Causal {Effects} in {Nonexperimental} {Studies}: {Reevaluating} the
 460 | 	{Evaluation} of {Training} {Programs}},
 461 |   journal = {Journal of the American Statistical Association},
 462 |   year = {1999},
 463 |   volume = {94},
 464 |   pages = {1053-1062},
 465 |   owner = {Laurence},
 466 |   timestamp = {2010.01.02}
 467 | }
 468 | 
 469 | @ARTICLE{DonaldHsu.2011,
 470 |   author = {Stephen G. Donald and Yu-Chin Hsu},
 471 |   title = {Estimation and {Inference} for {Distribution} {Functions} and {Quantile}
 472 | 	{Functions} in {Treatment} {Effect} {Models}},
 473 |   journal = {Unpublished manuscript},
 474 |   year = {2011},
 475 |   owner = {laurencium},
 476 |   timestamp = {2013.10.20}
 477 | }
 478 | 
 479 | @ARTICLE{DonaldLang.2007,
 480 |   author = {Stephen G. Donald and Kevin Lang},
 481 |   title = {Inference with difference-in-differences and other panel data},
 482 |   journal = {Review of Economics and Statistics},
 483 |   year = {2007},
 484 |   volume = {89},
 485 |   pages = {221-233},
 486 |   owner = {Laurence},
 487 |   timestamp = {2010.01.19}
 488 | }
 489 | 
 490 | @ARTICLE{DonohoJohnstoneKerykyacharianPicard.1995,
 491 |   author = {David L. Donoho and Iain M. Johnstone and Gerard Kerkyacharian and
 492 | 	Dominique Picard},
 493 |   title = {Wavelet shrinkage: asymptopia?},
 494 |   journal = {Journal of the Royal Statistical Society, Series B},
 495 |   year = {1995},
 496 |   volume = {57},
 497 |   pages = {301-369},
 498 |   owner = {Laurence},
 499 |   timestamp = {2011.03.16}
 500 | }
 501 | 
 502 | @ARTICLE{Efron.1979,
 503 |   author = {Efron, Bradley},
 504 |   title = {Bootstrapping methods: Another look at the jackknife},
 505 |   journal = {Annals of Statistics},
 506 |   year = {1979},
 507 |   volume = {7},
 508 |   pages = {1--26}
 509 | }
 510 | 
 511 | @ARTICLE{EfronHastieJohnstoneTibshirani.2004,
 512 |   author = {Bradley Efron and Trevor Hastie and Iain M. Johnstone and Robert
 513 | 	Tibshirani},
 514 |   title = {Least angle regression},
 515 |   journal = {Annals of Statistics},
 516 |   year = {2004},
 517 |   volume = {32},
 518 |   pages = {407-499},
 519 |   owner = {Laurence},
 520 |   timestamp = {2011.03.16}
 521 | }
 522 | 
 523 | @ARTICLE{EngleGranger.1987,
 524 |   author = {Robert F. Engle and Clive W. J. Granger},
 525 |   title = {Co-integration and error correction: respresentation, estimation
 526 | 	and testing},
 527 |   journal = {Econometrica},
 528 |   year = {1987},
 529 |   volume = {55},
 530 |   pages = {251-276},
 531 |   owner = {Laurence},
 532 |   timestamp = {2011.06.09}
 533 | }
 534 | 
 535 | @ARTICLE{FanPark.2009,
 536 |   author = {Yanqin Fan and Sang Soo Park},
 537 |   title = {Sharp {Bounds} on the {Distribution} of {Treatment} {Effects} and
 538 | 	{Their} {Statistical} {Inference}},
 539 |   journal = {Econometric Theory},
 540 |   year = {2009},
 541 |   volume = {26},
 542 |   pages = {1-21},
 543 |   owner = {Laurence},
 544 |   timestamp = {2010.02.20}
 545 | }
 546 | 
 547 | @ARTICLE{Firpo.2007,
 548 |   author = {Sergio Firpo},
 549 |   title = {Efficient {Semiparametric} {Estimation} of {Quantile} {Treatment}
 550 | 	{Effects}},
 551 |   journal = {Econometrica},
 552 |   year = {2007},
 553 |   volume = {75},
 554 |   pages = {259-276},
 555 |   owner = {Laurence},
 556 |   timestamp = {2010.01.01}
 557 | }
 558 | 
 559 | @ARTICLE{FirpoRidder.2008,
 560 |   author = {Firpo, Sergio and Ridder, Geert},
 561 |   title = {Bounds on {Functionals} of the {Distribution} of {Treatment} {Effects}},
 562 |   journal = {IEPR Working Paper},
 563 |   year = {2008},
 564 |   owner = {laurencium},
 565 |   timestamp = {2013.11.14}
 566 | }
 567 | 
 568 | @ARTICLE{FrischWaugh.1933,
 569 |   author = {Frisch, Ragnar and Waugh, Frederick V.},
 570 |   title = {Partial time regressions as compared with individual trends},
 571 |   journal = {Econometrica},
 572 |   year = {1933},
 573 |   volume = {1},
 574 |   pages = {387-401}
 575 | }
 576 | 
 577 | @ARTICLE{GautierSiegmannVanVuuren.2009,
 578 |   author = {Pieter A. Gautier and Arjen Siegmann and Aico {Van Vuuren}},
 579 |   title = {Terrorism and attitudes towards minorities: the effect of the Theo
 580 | 	van Gogh murder on house prices in Amsterdam},
 581 |   journal = {Journal of Urban Economics},
 582 |   year = {2009},
 583 |   volume = {65},
 584 |   pages = {113-126},
 585 |   owner = {Laurence},
 586 |   timestamp = {2011.02.20}
 587 | }
 588 | 
 589 | @BOOK{Goldberger.1991,
 590 |   title = {A course in econometrics},
 591 |   publisher = {Harvard University Press},
 592 |   year = {1991},
 593 |   author = {Arthur S. Goldberger},
 594 |   address = {Cambridge, MA},
 595 |   owner = {Laurence},
 596 |   timestamp = {2010.01.01}
 597 | }
 598 | 
 599 | @BOOK{Greene.2002,
 600 |   title = {Econometric analysis},
 601 |   publisher = {Prentice-Hall},
 602 |   year = {2002},
 603 |   author = {Greene, William H.},
 604 |   address = {New York},
 605 |   edition = {5th}
 606 | }
 607 | 
 608 | @ARTICLE{Hahn.1998,
 609 |   author = {Jinyong Hahn},
 610 |   title = {On the role of the propensity score in efficient semiparametric estimation
 611 | 	of average treatment effects},
 612 |   journal = {Econometrica},
 613 |   year = {1998},
 614 |   volume = {66},
 615 |   pages = {315-331},
 616 |   owner = {Laurence},
 617 |   timestamp = {2010.01.02}
 618 | }
 619 | 
 620 | @ARTICLE{HahnToddvanderKlaauw.2001,
 621 |   author = {Hahn, Jinyong and Todd, Petra and {van der Klaauw}, Wilbert},
 622 |   title = {Identification and {Estimation} of {Treatment} {Effects} with a {Regression-Discontinuity}
 623 | 	{Design}},
 624 |   journal = {Econometrica},
 625 |   year = {2001},
 626 |   volume = {69},
 627 |   pages = {201-209},
 628 |   owner = {Laurence},
 629 |   timestamp = {2010.02.02}
 630 | }
 631 | 
 632 | @BOOK{Hamilton.1994,
 633 |   title = {Time series analysis},
 634 |   publisher = {Princeton University Press},
 635 |   year = {1994},
 636 |   author = {Hamilton, James D.},
 637 |   address = {Princeton}
 638 | }
 639 | 
 640 | @ARTICLE{HanHausman.1990,
 641 |   author = {Aaron Han and Jerry A. Hausman},
 642 |   title = {Flexible parametric estimation of duration and competing risk models},
 643 |   journal = {Journal of Applied Econometrics},
 644 |   year = {1990},
 645 |   volume = {5},
 646 |   pages = {1-28},
 647 |   owner = {Laurence},
 648 |   timestamp = {2010.02.20}
 649 | }
 650 | 
 651 | @ARTICLE{Hansen.1982,
 652 |   author = {Hansen, Lars Peter},
 653 |   title = {Large sample properties of generalized method of moments estimators},
 654 |   journal = {Econometrica},
 655 |   year = {1982},
 656 |   volume = {50},
 657 |   pages = {1029--1054}
 658 | }
 659 | 
 660 | @BOOK{HastieTibshiraniFriedman.2009,
 661 |   title = {The Elements of Statistical Learning},
 662 |   publisher = {Springer},
 663 |   year = {2009},
 664 |   author = {Hastie, Trevor and Tibshirani, Robert and Friedman, Jerome},
 665 |   owner = {laurence},
 666 |   timestamp = {2014.10.05}
 667 | }
 668 | 
 669 | @ARTICLE{Hausman.1985,
 670 |   author = {Jerry A. Hausman},
 671 |   title = {The econometrics of nonlinear budget sets},
 672 |   journal = {Econometrica},
 673 |   year = {1985},
 674 |   volume = {53},
 675 |   pages = {1255-1282},
 676 |   owner = {Laurence},
 677 |   timestamp = {2010.01.01}
 678 | }
 679 | 
 680 | @ARTICLE{HausmanTaylor.1981,
 681 |   author = {Jerry A. Hausman and William E. Taylor},
 682 |   title = {Panel data and unobservable individual effects},
 683 |   journal = {Econometrica},
 684 |   year = {1981},
 685 |   volume = {49},
 686 |   pages = {1377-1398},
 687 |   owner = {Laurence},
 688 |   timestamp = {2010.01.01}
 689 | }
 690 | 
 691 | @ARTICLE{HausmanWise.1978,
 692 |   author = {Jerry A. Hausman and David A. Wise},
 693 |   title = {A conditional probit model for qualitative choice: discrete decisions
 694 | 	recognizing interdependence and heterogeneous preferences},
 695 |   journal = {Econometrica},
 696 |   year = {1978},
 697 |   volume = {46},
 698 |   pages = {403-426},
 699 |   owner = {Laurence},
 700 |   timestamp = {2010.02.18}
 701 | }
 702 | 
 703 | @BOOK{Hayashi.2000,
 704 |   title = {Econometrics},
 705 |   publisher = {Princeton University Press},
 706 |   year = {2000},
 707 |   author = {Hayashi, Fumio},
 708 |   address = {Princeton}
 709 | }
 710 | 
 711 | @ARTICLE{Heckman.1979,
 712 |   author = {James J. Heckman},
 713 |   title = {Sample selection bias as a specification error},
 714 |   journal = {Econometrica},
 715 |   year = {1979},
 716 |   volume = {47},
 717 |   pages = {153-161},
 718 |   owner = {Laurence},
 719 |   timestamp = {2010.01.01}
 720 | }
 721 | 
 722 | @ARTICLE{Hinrichs.2011,
 723 |   author = {Peter Hinrichs},
 724 |   title = {The effects of affirmative action bans on college enrollment, educational
 725 | 	attainment, and the demographic composition of universities},
 726 |   journal = {Review of Economics and Statistics},
 727 |   year = {2011},
 728 |   volume = {forthcoming},
 729 |   owner = {Laurence},
 730 |   timestamp = {2011.02.20}
 731 | }
 732 | 
 733 | @ARTICLE{HiranoImbens.2001,
 734 |   author = {Keisuke Hirano and Guido W. Imbens},
 735 |   title = {Estimation of causal effects using propensity score weighting: an
 736 | 	application to data on right heart catheterization},
 737 |   journal = {Health Services and Outcomes Research Methodology},
 738 |   year = {2001},
 739 |   volume = {2},
 740 |   pages = {259-278},
 741 |   owner = {Laurence},
 742 |   timestamp = {2010.07.23}
 743 | }
 744 | 
 745 | @ARTICLE{HiranoImbensRidder.2003,
 746 |   author = {Keisuke Hirano and Guido W. Imbens and Geert Ridder},
 747 |   title = {Efficient {Estimation} of {Average} {Treatment} {Effects} {Using}
 748 | 	the {Estimated} {Propensity} {Score}},
 749 |   journal = {Econometrica},
 750 |   year = {2003},
 751 |   volume = {71},
 752 |   pages = {1161-1189},
 753 |   owner = {Laurence},
 754 |   timestamp = {2010.01.02}
 755 | }
 756 | 
 757 | @ARTICLE{HoerlKennard.1970,
 758 |   author = {Arthur E. Hoerl and Robert W. Kennard},
 759 |   title = {Ridge regression: biased estimation for nonorthogonal problems},
 760 |   journal = {Technometrics},
 761 |   year = {1970},
 762 |   volume = {12},
 763 |   pages = {55-67},
 764 |   owner = {Laurence},
 765 |   timestamp = {2011.05.10}
 766 | }
 767 | 
 768 | @ARTICLE{Holland.1986,
 769 |   author = {Paul W. Holland},
 770 |   title = {Statistics and {Causal} {Inference}},
 771 |   journal = {Journal of American Statistical Association},
 772 |   year = {1986},
 773 |   volume = {81},
 774 |   pages = {945-960},
 775 |   owner = {laurence},
 776 |   timestamp = {2013.09.19}
 777 | }
 778 | 
 779 | @ARTICLE{Horowitz.1999,
 780 |   author = {Joel L. Horowitz},
 781 |   title = {Semiparametric estimation of a proportional hazard model with unobserved
 782 | 	heterogeneity},
 783 |   journal = {Econometrica},
 784 |   year = {1999},
 785 |   volume = {67},
 786 |   pages = {1001-1028},
 787 |   owner = {Laurence},
 788 |   timestamp = {2010.09.22}
 789 | }
 790 | 
 791 | @ARTICLE{Horowitz.1992,
 792 |   author = {Joel L. Horowitz},
 793 |   title = {A smoothed maximum score estimator for the binary response model},
 794 |   journal = {Econometrica},
 795 |   year = {1992},
 796 |   volume = {60},
 797 |   pages = {505-531},
 798 |   owner = {Laurence},
 799 |   timestamp = {2010.09.22}
 800 | }
 801 | 
 802 | @ARTICLE{HotzImbensMortimer.2005,
 803 |   author = {V. Joseph Hotz and Guido W. Imbens and Julie H. Mortimer},
 804 |   title = {Predicting the {Efficacy} of {Future} {Training} {Programs} {Using}
 805 | 	{Past} {Experiences} at {Other} {Locations}},
 806 |   journal = {Journal of Econometrics},
 807 |   year = {2005},
 808 |   volume = {125},
 809 |   pages = {241-270},
 810 |   owner = {laurencium},
 811 |   timestamp = {2013.10.18}
 812 | }
 813 | 
 814 | @ARTICLE{HuangLiuPourahmadiLiu.2006,
 815 |   author = {Jianhua Z. Huang and Naiping Liu and Mohsen Pourahmadi and Linxu
 816 | 	Liu},
 817 |   title = {Covariance matrix selection and estimation via penalised normal likelihood},
 818 |   journal = {Biometrika},
 819 |   year = {2006},
 820 |   volume = {93},
 821 |   pages = {85-98},
 822 |   owner = {Laurence},
 823 |   timestamp = {2011.03.16}
 824 | }
 825 | 
 826 | @ARTICLE{Ichimura.1993,
 827 |   author = {Hidehiko Ichimura},
 828 |   title = {Semiparametric least squares (SLS) and weighted SLS estimation of
 829 | 	single-index models},
 830 |   journal = {Journal of Econometrics},
 831 |   year = {1993},
 832 |   volume = {58},
 833 |   pages = {71-120},
 834 |   owner = {Laurence},
 835 |   timestamp = {2010.01.01}
 836 | }
 837 | 
 838 | @ARTICLE{Imbens.2004,
 839 |   author = {Guido W. Imbens},
 840 |   title = {Nonparametric estimation of average treatment effects under exogeneity:
 841 | 	a review},
 842 |   journal = {Review of Economics and Statistics},
 843 |   year = {2004},
 844 |   volume = {86},
 845 |   pages = {4-29},
 846 |   owner = {Laurence},
 847 |   timestamp = {2010.07.23}
 848 | }
 849 | 
 850 | @ARTICLE{ImbensAngrist.1994,
 851 |   author = {Guido W. Imbens and Joshua D. Angrist},
 852 |   title = {Identification and {Estimation} of {Local} {Average} {Treatment}
 853 | 	{Effects}},
 854 |   journal = {Econometrica},
 855 |   year = {1994},
 856 |   volume = {62},
 857 |   pages = {467-475},
 858 |   owner = {Laurence},
 859 |   timestamp = {2010.01.01}
 860 | }
 861 | 
 862 | @ARTICLE{ImbensLancaster.1996,
 863 |   author = {Guido W. Imbens and Tony Lancaster},
 864 |   title = {Efficient estimation and stratified sampling},
 865 |   journal = {Journal of Econometrics},
 866 |   year = {1996},
 867 |   volume = {74},
 868 |   pages = {289-318},
 869 |   owner = {Laurence},
 870 |   timestamp = {2010.02.08}
 871 | }
 872 | 
 873 | @ARTICLE{ImbensLemieux.2008,
 874 |   author = {Guido W. Imbens and Thomas Lemieux},
 875 |   title = {Regression discontinuity designs: a guide to practice},
 876 |   journal = {Journal of Econometrics},
 877 |   year = {2008},
 878 |   volume = {142},
 879 |   pages = {615-635},
 880 |   owner = {Laurence},
 881 |   timestamp = {2010.01.15}
 882 | }
 883 | 
 884 | @ARTICLE{ImbensManski.2004,
 885 |   author = {Guido W. Imbens and Charles F. Manski},
 886 |   title = {Confidence {Intervals} for {Partially} {Identified} {Parameters}},
 887 |   journal = {Econometrica},
 888 |   year = {2004},
 889 |   volume = {72},
 890 |   pages = {1845-1857},
 891 |   owner = {laurencium},
 892 |   timestamp = {2013.10.08}
 893 | }
 894 | 
 895 | @BOOK{ImbensRubin.2015,
 896 |   title = {Causal Inference in Statistics, Social, and Biomedical Sciences:
 897 | 	An Introduction},
 898 |   publisher = {Cambridge University Press},
 899 |   year = {2015},
 900 |   author = {Guido W. Imbens and Donald B. Rubin},
 901 |   owner = {laurence},
 902 |   timestamp = {2014.10.26}
 903 | }
 904 | 
 905 | @ARTICLE{ImbensWooldridge.2009,
 906 |   author = {Guido W. Imbens and Jeffrey M. Wooldridge},
 907 |   title = {Recent developments in the econometrics of program evaluation},
 908 |   journal = {Journal of Economic Literature},
 909 |   year = {2009},
 910 |   volume = {47},
 911 |   pages = {5-86},
 912 |   owner = {Laurence},
 913 |   timestamp = {2010.01.15}
 914 | }
 915 | 
 916 | @ARTICLE{Keele.2009,
 917 |   author = {Luke Keele},
 918 |   title = {An observational study of ballot initiatives and state outcomes},
 919 |   journal = {Working paper},
 920 |   year = {2009},
 921 |   owner = {Laurence},
 922 |   timestamp = {2011.02.20}
 923 | }
 924 | 
 925 | @BOOK{Kennedy.2003,
 926 |   title = {A guide to econometrics},
 927 |   publisher = {MIT Press},
 928 |   year = {2003},
 929 |   author = {Kennedy, Peter E.},
 930 |   address = {Cambridge},
 931 |   edition = {5th}
 932 | }
 933 | 
 934 | @ARTICLE{KitamuraStutzer.1997,
 935 |   author = {Yuichi Kitamura and Michael Stutzer},
 936 |   title = {An information-theoretic alternative to generalized method of moments
 937 | 	estimation},
 938 |   journal = {Econometrica},
 939 |   year = {1997},
 940 |   volume = {65},
 941 |   pages = {861-874},
 942 |   owner = {Laurence},
 943 |   timestamp = {2010.01.01}
 944 | }
 945 | 
 946 | @BOOK{Koenker.2005,
 947 |   title = {Quantile {Regression}},
 948 |   publisher = {Cambridge University Press},
 949 |   year = {2005},
 950 |   author = {Roger Koenker},
 951 |   owner = {Laurence},
 952 |   timestamp = {2010.01.01}
 953 | }
 954 | 
 955 | @ARTICLE{KoenkerBassett.1978,
 956 |   author = {Roger Koenker and Gilbert Bassett},
 957 |   title = {Regression quantiles},
 958 |   journal = {Econometrica},
 959 |   year = {1978},
 960 |   volume = {46},
 961 |   pages = {33-50},
 962 |   owner = {Laurence},
 963 |   timestamp = {2010.01.01}
 964 | }
 965 | 
 966 | @ARTICLE{LaLonde.1986,
 967 |   author = {Robert J. {LaLonde}},
 968 |   title = {Evaluating the Econometric Evaluations of Training Programs with
 969 | 	Experimental Data},
 970 |   journal = {American Economic Review},
 971 |   year = {1986},
 972 |   volume = {76},
 973 |   pages = {604-620},
 974 |   owner = {laurencium},
 975 |   timestamp = {2013.11.14}
 976 | }
 977 | 
 978 | @BOOK{Lancaster.1990,
 979 |   title = {The econometrics analysis of transition data},
 980 |   publisher = {Cambridge University Press},
 981 |   year = {1990},
 982 |   author = {Tony Lancaster},
 983 |   owner = {Laurence},
 984 |   timestamp = {2010.02.18}
 985 | }
 986 | 
 987 | @ARTICLE{Lancaster.1979,
 988 |   author = {Tony Lancaster},
 989 |   title = {Econometric methods for the duration of unemployment},
 990 |   journal = {Econometrica},
 991 |   year = {1979},
 992 |   volume = {47},
 993 |   pages = {939-956},
 994 |   owner = {Laurence},
 995 |   timestamp = {2010.02.22}
 996 | }
 997 | 
 998 | @ARTICLE{Lechner.2002,
 999 |   author = {Michael Lechner},
1000 |   title = {Program {Heterogeneity} and {Propensity} {Score} {Matching}: {An}
1001 | 	{Application} to the {Evaluation} of {Active} {Labor} {Market} {Policies}},
1002 |   journal = {Review of Economics and Statistics},
1003 |   year = {2002},
1004 |   volume = {84},
1005 |   pages = {205-220},
1006 |   owner = {laurencium},
1007 |   timestamp = {2013.10.18}
1008 | }
1009 | 
1010 | @ARTICLE{LedoitWolf.2004,
1011 |   author = {Olivier Ledoit and Michael Wolf},
1012 |   title = {A well-conditioned estimator for large-dimensional covariance matrices},
1013 |   journal = {Journal of Multivariate Analysis},
1014 |   year = {2004},
1015 |   volume = {88},
1016 |   pages = {365-411},
1017 |   owner = {Laurence},
1018 |   timestamp = {2011.03.16}
1019 | }
1020 | 
1021 | @BOOK{LehmannRomano.2005,
1022 |   title = {Testing statistical hypothesis},
1023 |   publisher = {Springer},
1024 |   year = {2005},
1025 |   author = {E. L. Lehmann and Joseph P. Romano},
1026 |   address = {New York},
1027 |   edition = {3rd},
1028 |   owner = {alumni},
1029 |   timestamp = {2010.04.30}
1030 | }
1031 | 
1032 | @ARTICLE{Lovell.1963,
1033 |   author = {Lovell, Michael C.},
1034 |   title = {Seasonal adjustment of economic time series},
1035 |   journal = {Journal of the American Statistical Association},
1036 |   year = {1963},
1037 |   volume = {58},
1038 |   pages = {993--1010}
1039 | }
1040 | 
1041 | @ARTICLE{MacKinnonWhite.1985,
1042 |   author = {MacKinnon, James Gordon and White, Halbert},
1043 |   title = {Some heteroskedasticity consistent covariance matrix estimators with
1044 | 	improved finite sample properties},
1045 |   journal = {Journal of Econometrics},
1046 |   year = {1985},
1047 |   volume = {29},
1048 |   pages = {305-325}
1049 | }
1050 | 
1051 | @ARTICLE{Makarov.1981,
1052 |   author = {Makarov, G. D.},
1053 |   title = {Estimates for the {Distribution} {Function} of a {Sum} of {Two} {Random}
1054 | 	{Variables} when the {Marginal} {Distributions} are {Fixed}},
1055 |   journal = {Theory of Probability and its Applications},
1056 |   year = {1981},
1057 |   volume = {26},
1058 |   pages = {803-806},
1059 |   owner = {laurencium},
1060 |   timestamp = {2013.11.14}
1061 | }
1062 | 
1063 | @BOOK{Manski.2007,
1064 |   title = {{Identification for Prediction and Decision}},
1065 |   publisher = {Harvard University Press},
1066 |   year = {2007},
1067 |   author = {Charles F. Manski},
1068 |   address = {MA},
1069 |   owner = {laurencium},
1070 |   timestamp = {2013.11.15}
1071 | }
1072 | 
1073 | @BOOK{Manski.2003,
1074 |   title = {Partial {Identification} of {Probability} {Distributions}},
1075 |   publisher = {Springer-Verlag},
1076 |   year = {2003},
1077 |   author = {Charles F. Manski},
1078 |   address = {New York},
1079 |   owner = {laurencium},
1080 |   timestamp = {2013.11.15}
1081 | }
1082 | 
1083 | @ARTICLE{Manski.1985,
1084 |   author = {Charles F. Manski},
1085 |   title = {Semiparametric analysis of discrete response: asymptotic properties
1086 | 	of the maximum score estimator},
1087 |   journal = {Journal of Econometrics},
1088 |   year = {1985},
1089 |   volume = {27},
1090 |   pages = {313-333},
1091 |   owner = {Laurence},
1092 |   timestamp = {2010.01.01}
1093 | }
1094 | 
1095 | @ARTICLE{Manski.1975,
1096 |   author = {Charles F. Manski},
1097 |   title = {Maximum score estimation of the stochastic utility model of choice},
1098 |   journal = {Journal of Econometrics},
1099 |   year = {1975},
1100 |   volume = {3},
1101 |   pages = {205-228},
1102 |   owner = {Laurence},
1103 |   timestamp = {2010.01.01}
1104 | }
1105 | 
1106 | @BOOK{MardiaKentBibby.1979,
1107 |   title = {Multivariate Analysis},
1108 |   publisher = {Academic Press},
1109 |   year = {1979},
1110 |   author = {Mardia, K. T. and Kent, J. T. and Bibby, J. M.},
1111 |   address = {London},
1112 |   owner = {alumni},
1113 |   timestamp = {2010.04.30}
1114 | }
1115 | 
1116 | @ARTICLE{McFadden.1989,
1117 |   author = {Daniel McFadden},
1118 |   title = {A method of simulated moments for estimation of discrete response
1119 | 	models without numerical integration},
1120 |   journal = {Econometrica},
1121 |   year = {1989},
1122 |   volume = {57},
1123 |   pages = {995-1026},
1124 |   owner = {Laurence},
1125 |   timestamp = {2010.01.01}
1126 | }
1127 | 
1128 | @ARTICLE{McKeeRivkinSims.2010,
1129 |   author = {Graham J. McKee and Steven G. Rivkin and Katherine R. E. Sims},
1130 |   title = {Disruption, {Achievement} and the {Heterogeneous} {Benefits} of {Smaller}
1131 | 	{Classes}},
1132 |   journal = {NBER Working Paper No. 15812},
1133 |   year = {2010},
1134 |   owner = {laurencium},
1135 |   timestamp = {2013.10.18}
1136 | }
1137 | 
1138 | @ARTICLE{Meyer.1990,
1139 |   author = {Bruce D. Meyer},
1140 |   title = {Unemployment insurance and unemployment spells},
1141 |   journal = {Econometrica},
1142 |   year = {1990},
1143 |   volume = {58},
1144 |   pages = {757-782},
1145 |   owner = {Laurence},
1146 |   timestamp = {2010.02.20}
1147 | }
1148 | 
1149 | @ARTICLE{Newey.1994,
1150 |   author = {Whitney K. Newey},
1151 |   title = {The asymptotic variance of semiparametric estimators},
1152 |   journal = {Econometrica},
1153 |   year = {1994},
1154 |   volume = {62},
1155 |   pages = {1349-1382},
1156 |   owner = {Laurence},
1157 |   timestamp = {2010.01.01}
1158 | }
1159 | 
1160 | @ARTICLE{Newey.1991,
1161 |   author = {Whitney K. Newey},
1162 |   title = {Uniform convergence in probability and stochastic equicontinuity},
1163 |   journal = {Econometrica},
1164 |   year = {1991},
1165 |   volume = {59},
1166 |   pages = {1161-1167},
1167 |   owner = {Laurence},
1168 |   timestamp = {2010.09.13}
1169 | }
1170 | 
1171 | @ARTICLE{Newey.1990,
1172 |   author = {Whitney K. Newey},
1173 |   title = {Semiparametric efficiency bounds},
1174 |   journal = {Journal of Applied Econometrics},
1175 |   year = {1990},
1176 |   volume = {5},
1177 |   pages = {99-135},
1178 |   owner = {Laurence},
1179 |   timestamp = {2010.01.01}
1180 | }
1181 | 
1182 | @INCOLLECTION{NeweyMcFadden.1994,
1183 |   author = {Whitney K. Newey and Daniel McFadden},
1184 |   title = {Large sample estimation and hypothesis testing},
1185 |   booktitle = {Handbook of Econometrics},
1186 |   publisher = {North-Holland},
1187 |   year = {1994},
1188 |   editor = {Robert F. Engle and Daniel McFadden},
1189 |   volume = {4},
1190 |   pages = {2111-2245},
1191 |   owner = {Laurence},
1192 |   timestamp = {2010.01.01}
1193 | }
1194 | 
1195 | @ARTICLE{NeweyWest.1987,
1196 |   author = {Newey, Whitney K. and West, Kenneth D.},
1197 |   title = {A simple, positive semi-definite, heteroskedasticity and autocorrelation
1198 | 	consistent covariance matrix},
1199 |   journal = {Econometrica},
1200 |   year = {1987},
1201 |   volume = {55},
1202 |   pages = {703--708}
1203 | }
1204 | 
1205 | @ARTICLE{NgPerron.1995,
1206 |   author = {Serena Ng and Pierre Perron},
1207 |   title = {Unit root tests in ARMA models with data-dependent methods for the
1208 | 	selection of the truncation lag},
1209 |   journal = {Journal of American Statistical Association},
1210 |   year = {1995},
1211 |   volume = {90},
1212 |   pages = {268-281},
1213 |   owner = {Laurence},
1214 |   timestamp = {2011.06.09}
1215 | }
1216 | 
1217 | @ARTICLE{OlleyPakes.1996,
1218 |   author = {G. Steven Olley and Ariel Pakes},
1219 |   title = {The dynamics of productivity in the telecommunications equipment
1220 | 	industry},
1221 |   journal = {Econometrica},
1222 |   year = {1996},
1223 |   volume = {64},
1224 |   pages = {1263-1297},
1225 |   owner = {Laurence},
1226 |   timestamp = {2010.01.01}
1227 | }
1228 | 
1229 | @BOOK{PaganUllah.1999,
1230 |   title = {Nonparametric econometrics},
1231 |   publisher = {Cambridge University Press},
1232 |   year = {1999},
1233 |   author = {Adrian Pagan and Aman Ullah},
1234 |   owner = {Laurence},
1235 |   timestamp = {2010.01.01}
1236 | }
1237 | 
1238 | @ARTICLE{Pakes.1986,
1239 |   author = {Ariel Pakes},
1240 |   title = {Patents as options: some estimates of the value of holding European
1241 | 	patent stocks},
1242 |   journal = {Econometrica},
1243 |   year = {1986},
1244 |   volume = {54},
1245 |   pages = {755-784},
1246 |   owner = {Laurence},
1247 |   timestamp = {2010.01.01}
1248 | }
1249 | 
1250 | @ARTICLE{Park.2012,
1251 |   author = {Byoung G. Park},
1252 |   title = {Nonparametric {Identification} and {Estimation} of the {Extended}
1253 | 	{Roy} {Model}},
1254 |   journal = {Job Market Paper},
1255 |   year = {2012},
1256 |   owner = {laurencium},
1257 |   timestamp = {2013.11.14}
1258 | }
1259 | 
1260 | @ARTICLE{Pesaran.2006,
1261 |   author = {M. Hashem Pesaran},
1262 |   title = {Estimation and inference in large heterogeneous panels with a multifactor
1263 | 	error structure},
1264 |   journal = {Econometrica},
1265 |   year = {2006},
1266 |   volume = {74},
1267 |   pages = {967-1012},
1268 |   owner = {Laurence},
1269 |   timestamp = {2010.01.07}
1270 | }
1271 | 
1272 | @ARTICLE{PhillipsPerron.1988,
1273 |   author = {Peter C. B. Phillips and Pierre Perron},
1274 |   title = {Testing for a unit root in time series regression},
1275 |   journal = {Biometrika},
1276 |   year = {1988},
1277 |   volume = {75},
1278 |   pages = {335-346},
1279 |   owner = {Laurence},
1280 |   timestamp = {2011.06.09}
1281 | }
1282 | 
1283 | @ARTICLE{DimitrisRomano.1994,
1284 |   author = {Dimitris N. Politis and Joseph P. Romano},
1285 |   title = {Large sample confidence regions based on subsamples under minimal
1286 | 	assumptions},
1287 |   journal = {The Annals of Statistics},
1288 |   year = {1994},
1289 |   volume = {22},
1290 |   pages = {2031-2050},
1291 |   owner = {alumni},
1292 |   timestamp = {2010.05.13}
1293 | }
1294 | 
1295 | @BOOK{PolitisRomanoWolf.1999,
1296 |   title = {Subsampling},
1297 |   publisher = {Springer},
1298 |   year = {1999},
1299 |   author = {Dimitris N. Politis and Joseph P. Romano and Michael Wolf},
1300 |   address = {New York},
1301 |   owner = {laurencium},
1302 |   timestamp = {2013.10.18}
1303 | }
1304 | 
1305 | @BOOK{Pollard.1984,
1306 |   title = {Convergence of stochastic processes},
1307 |   publisher = {Springer-Verlag},
1308 |   year = {1984},
1309 |   author = {David Pollard},
1310 |   address = {New York},
1311 |   owner = {alumni},
1312 |   timestamp = {2010.04.30}
1313 | }
1314 | 
1315 | @ARTICLE{Portnoy.1985,
1316 |   author = {Stephen Portnoy},
1317 |   title = {Asymptotic behavior of $M$-estimators of $p$ regression parameters
1318 | 	when $p^2/n$ is large. II. Normal approximatoin},
1319 |   journal = {Annals of Statistics},
1320 |   year = {1985},
1321 |   volume = {13},
1322 |   pages = {1403-1417},
1323 |   owner = {Laurence},
1324 |   timestamp = {2011.03.16}
1325 | }
1326 | 
1327 | @ARTICLE{Portnoy.1984,
1328 |   author = {Stephen Portnoy},
1329 |   title = {Asymptotic behavior of $M$-estimators of $p$ regression parameters
1330 | 	when $p^2/n$ is large. I. Consistency},
1331 |   journal = {Annals of Statistics},
1332 |   year = {1984},
1333 |   volume = {12},
1334 |   pages = {1298-1309},
1335 |   owner = {Laurence},
1336 |   timestamp = {2011.03.16}
1337 | }
1338 | 
1339 | @ARTICLE{Powell.1984,
1340 |   author = {James L. Powell},
1341 |   title = {Least absolute deviations estimation for the censored regression
1342 | 	model},
1343 |   journal = {Journal of Econometrics},
1344 |   year = {1984},
1345 |   volume = {25},
1346 |   pages = {303-325},
1347 |   owner = {Laurence},
1348 |   timestamp = {2010.01.01}
1349 | }
1350 | 
1351 | @ARTICLE{PrenticeGloeckler.1978,
1352 |   author = {R. L. Prentice and L. A. Gloeckler},
1353 |   title = {Regression analysis of grouped survival data with application to
1354 | 	breast cancer data},
1355 |   journal = {Biometrics},
1356 |   year = {1978},
1357 |   volume = {34},
1358 |   pages = {57-67},
1359 |   owner = {Laurence},
1360 |   timestamp = {2010.02.22}
1361 | }
1362 | 
1363 | @ARTICLE{QinLawless.1994,
1364 |   author = {Jing Qin and Jerry Lawless},
1365 |   title = {Empirical likelihood and general estimating equations},
1366 |   journal = {The Annals of Statistics},
1367 |   year = {1994},
1368 |   volume = {22},
1369 |   pages = {300-325},
1370 |   owner = {Laurence},
1371 |   timestamp = {2010.01.01}
1372 | }
1373 | 
1374 | @ARTICLE{Robinson.1988,
1375 |   author = {Robinson, P. M.},
1376 |   title = {Root-N-consistent semiparametric regression},
1377 |   journal = {Econometrica},
1378 |   year = {1988},
1379 |   volume = {56},
1380 |   pages = {931-954},
1381 |   owner = {Laurence},
1382 |   timestamp = {2010.09.04}
1383 | }
1384 | 
1385 | @ARTICLE{Robinson.1987,
1386 |   author = {Peter M. Robinson},
1387 |   title = {Asymptotically efficient estimation in the presence of heteroskedasticity
1388 | 	of unknown form},
1389 |   journal = {Econometrica},
1390 |   year = {1987},
1391 |   volume = {55},
1392 |   pages = {875-891},
1393 |   owner = {Laurence},
1394 |   timestamp = {2010.01.01}
1395 | }
1396 | 
1397 | @ARTICLE{RomanoShaikh.2008,
1398 |   author = {Joseph P. Romano and Azeem M. Shaikh},
1399 |   title = {Inference for {Identifiable} {Parameters} in {Partially} {Identified}
1400 | 	{Econometric} {Models}},
1401 |   journal = {Journal of Statistical Planning and Inference},
1402 |   year = {2008},
1403 |   volume = {138},
1404 |   pages = {2786-2807},
1405 |   owner = {laurence},
1406 |   timestamp = {2012.11.01}
1407 | }
1408 | 
1409 | @ARTICLE{Rosenbaum.1984,
1410 |   author = {Paul R. Rosenbaum},
1411 |   title = {The consequences of adjustment for a concomitant variable that has
1412 | 	been affected by the treatment},
1413 |   journal = {Journal of the Royal Statistical Society, Series A},
1414 |   year = {1984},
1415 |   volume = {147},
1416 |   pages = {656-666},
1417 |   owner = {Laurence},
1418 |   timestamp = {2010.08.07}
1419 | }
1420 | 
1421 | @ARTICLE{RosenbaumRubin.1983,
1422 |   author = {Paul R. Rosenbaum and Donald B. Rubin},
1423 |   title = {The central role of the propensity score in observational studies
1424 | 	for causal effects},
1425 |   journal = {Biometrika},
1426 |   year = {1983},
1427 |   volume = {70},
1428 |   pages = {41-55},
1429 |   owner = {Laurence},
1430 |   timestamp = {2010.01.01}
1431 | }
1432 | 
1433 | @ARTICLE{Rubin.1976,
1434 |   author = {Donald B. Rubin},
1435 |   title = {Inference and missing data},
1436 |   journal = {Biometrika},
1437 |   year = {1976},
1438 |   volume = {63},
1439 |   pages = {581-592},
1440 |   owner = {Laurence},
1441 |   timestamp = {2010.02.08}
1442 | }
1443 | 
1444 | @ARTICLE{Rubin.1974,
1445 |   author = {Donald B. Rubin},
1446 |   title = {Estimating {Causal} {Effects} of {Treatments} in {Randomized} and
1447 | 	{Nonrandomized} {Studies}},
1448 |   journal = {Journal of Educational Psychology},
1449 |   year = {1974},
1450 |   volume = {66},
1451 |   pages = {688-701},
1452 |   owner = {Laurence},
1453 |   timestamp = {2010.03.03}
1454 | }
1455 | 
1456 | @ARTICLE{Rust.1987,
1457 |   author = {John Rust},
1458 |   title = {Optimal replacement of GMC bus engines: an empirical model of Harold
1459 | 	Zurcher},
1460 |   journal = {Econometrica},
1461 |   year = {1987},
1462 |   volume = {55},
1463 |   pages = {999-1033},
1464 |   owner = {Laurence},
1465 |   timestamp = {2010.01.01}
1466 | }
1467 | 
1468 | @BOOK{Ruud.2000,
1469 |   title = {An introduction to classical econometric theory},
1470 |   publisher = {Oxford University Press},
1471 |   year = {2000},
1472 |   author = {Paul A. Ruud},
1473 |   address = {Oxford},
1474 |   owner = {Laurence},
1475 |   timestamp = {2010.01.01}
1476 | }
1477 | 
1478 | @ARTICLE{SaidDickey.1984,
1479 |   author = {Said E. Said and David A. Dickey},
1480 |   title = {Testing for unit roots in autoregressive-moving average models of
1481 | 	unknown order},
1482 |   journal = {Biometrika},
1483 |   year = {1984},
1484 |   volume = {71},
1485 |   pages = {599-607},
1486 |   owner = {Laurence},
1487 |   timestamp = {2011.06.09}
1488 | }
1489 | 
1490 | @ARTICLE{SansoNavarro.2011,
1491 |   author = {Marcos Sanso-Navarro},
1492 |   title = {The effects on American foreign direct investment in the United Kingdom
1493 | 	from not adopting the Euro},
1494 |   journal = {Journal of Common Market Studies},
1495 |   year = {2011},
1496 |   volume = {49},
1497 |   pages = {463-483},
1498 |   owner = {Laurence},
1499 |   timestamp = {2011.02.21}
1500 | }
1501 | 
1502 | @BOOK{Shiryaev.1995,
1503 |   title = {Probability},
1504 |   publisher = {Springer},
1505 |   year = {1995},
1506 |   author = {A. N. Shiryaev},
1507 |   address = {New York},
1508 |   edition = {2nd},
1509 |   owner = {alumni},
1510 |   timestamp = {2010.04.30}
1511 | }
1512 | 
1513 | @ARTICLE{Sommer.1986,
1514 |   author = {Alfred Sommer and Edi Djunaedi and A. A. Loeden and Ignatius Tarwotjo
1515 | 	and Keith P. West and Robert Tilden and Lisa Mele},
1516 |   title = {Impact of {Vitamin} {A} {Supplementation} on {Childhood} {Mortality}:
1517 | 	{A} {Randomised} {Controlled} {Community} {Trial}},
1518 |   journal = {The Lancet},
1519 |   year = {1986},
1520 |   volume = {327},
1521 |   pages = {1169-1173},
1522 |   owner = {laurencium},
1523 |   timestamp = {2013.11.19}
1524 | }
1525 | 
1526 | @ARTICLE{StaigerStock.1997,
1527 |   author = {Douglas Staiger and James H. Stock},
1528 |   title = {Instrumental variables regression with weak instruments},
1529 |   journal = {Econometrica},
1530 |   year = {1997},
1531 |   volume = {65},
1532 |   pages = {557-586},
1533 |   owner = {Laurence},
1534 |   timestamp = {2010.01.01}
1535 | }
1536 | 
1537 | @ARTICLE{StockWatson.2008,
1538 |   author = {James H. Stock and Mark W. Watson},
1539 |   title = {Heteroskedasticity-robust standard errors for fixed effects panel
1540 | 	data regression},
1541 |   journal = {Econometrica},
1542 |   year = {2008},
1543 |   volume = {76},
1544 |   pages = {155174},
1545 |   owner = {Laurence},
1546 |   timestamp = {2010.01.07}
1547 | }
1548 | 
1549 | @ARTICLE{StockWright.2000,
1550 |   author = {James H. Stock and Jonathan H. Wright},
1551 |   title = {GMM with weak identification},
1552 |   journal = {Econometrica},
1553 |   year = {2000},
1554 |   volume = {68},
1555 |   pages = {1055-1096},
1556 |   owner = {Laurence},
1557 |   timestamp = {2010.01.01}
1558 | }
1559 | 
1560 | @ARTICLE{Stute.1986a,
1561 |   author = {Winfried Stute},
1562 |   title = {Conditional empirical processes},
1563 |   journal = {Annals of Statistics},
1564 |   year = {1986},
1565 |   volume = {14},
1566 |   pages = {638-647},
1567 |   owner = {Laurence},
1568 |   timestamp = {2011.05.07}
1569 | }
1570 | 
1571 | @ARTICLE{Stute.1986b,
1572 |   author = {Winfried Stute},
1573 |   title = {On almost sure convergence of conditional empirical distribution
1574 | 	functions},
1575 |   journal = {Annals of Probability},
1576 |   year = {1986},
1577 |   volume = {14},
1578 |   pages = {891-901},
1579 |   owner = {Laurence},
1580 |   timestamp = {2011.05.07}
1581 | }
1582 | 
1583 | @ARTICLE{Tibshirani.1996,
1584 |   author = {Robert Tibshirani},
1585 |   title = {Regression shrinkage and selection via the lasso},
1586 |   journal = {Journal of the Royal Statistical Society, Series B},
1587 |   year = {1996},
1588 |   volume = {58},
1589 |   pages = {267-288},
1590 |   owner = {Laurence},
1591 |   timestamp = {2011.03.16}
1592 | }
1593 | 
1594 | @BOOK{Train.2009,
1595 |   title = {Discrete choice methods with simulation},
1596 |   publisher = {Cambridge University Press},
1597 |   year = {2009},
1598 |   author = {Kenneth E. Train},
1599 |   edition = {2nd},
1600 |   owner = {Laurence},
1601 |   timestamp = {2010.01.04}
1602 | }
1603 | 
1604 | @ARTICLE{Vytlacil.2002,
1605 |   author = {Edward Vytlacil},
1606 |   title = {Independence, monotonicity, and latent index models: an equivalence
1607 | 	result},
1608 |   journal = {Econometrica},
1609 |   year = {2002},
1610 |   volume = {70},
1611 |   pages = {331-341},
1612 |   owner = {Laurence},
1613 |   timestamp = {2010.07.12}
1614 | }
1615 | 
1616 | @ARTICLE{White.1980,
1617 |   author = {White, Halbert},
1618 |   title = {A heteroskedasticity-consistent covariance matrix estimator and a
1619 | 	direct test for heteroskedasticity},
1620 |   journal = {Econometrica},
1621 |   year = {1980},
1622 |   volume = {48},
1623 |   pages = {817--838}
1624 | }
1625 | 
1626 | @ARTICLE{WilliamsonDowns.1990,
1627 |   author = {Robert C. Williamson and Tom Downs},
1628 |   title = {Probabilitistic {Arithmetic} {I}: {Numerical} {Methods} for {Calculating}
1629 | 	{Convolutions} and {Dependency} {Bounds}},
1630 |   journal = {International Journal of Approximate Reasoning},
1631 |   year = {1990},
1632 |   volume = {4},
1633 |   pages = {89-158},
1634 |   owner = {laurence},
1635 |   timestamp = {2012.10.29}
1636 | }
1637 | 
1638 | @ARTICLE{Wooldridge.2007,
1639 |   author = {Jeffrey M. Wooldridge},
1640 |   title = {Inverse probability weighted estimation for general missing data
1641 | 	problem},
1642 |   journal = {Journal of Econometrics},
1643 |   year = {2007},
1644 |   volume = {141},
1645 |   pages = {1281-1301},
1646 |   owner = {Laurence},
1647 |   timestamp = {2010.02.08}
1648 | }
1649 | 
1650 | @ARTICLE{Wooldridge.2005,
1651 |   author = {Jeffrey M. Wooldridge},
1652 |   title = {Fixed-effects and related estimators for correlated random-coefficient
1653 | 	and treatment-effect panel data models},
1654 |   journal = {Review of Economics and Statistics},
1655 |   year = {2005},
1656 |   volume = {87},
1657 |   pages = {385-390},
1658 |   owner = {Laurence},
1659 |   timestamp = {2010.08.05}
1660 | }
1661 | 
1662 | @ARTICLE{Wooldridge.2003,
1663 |   author = {Jeffrey M. Wooldridge},
1664 |   title = {Cluster-sample methods in applied econometrics},
1665 |   journal = {American Economic Review},
1666 |   year = {2003},
1667 |   volume = {93},
1668 |   pages = {133-138},
1669 |   owner = {Laurence},
1670 |   timestamp = {2010.01.21}
1671 | }
1672 | 
1673 | @BOOK{Wooldridge.2002,
1674 |   title = {Econometric analysis of cross section and panel data},
1675 |   publisher = {MIT Press},
1676 |   year = {2002},
1677 |   author = {Wooldridge, Jeffrey M.},
1678 |   address = {Cambridge}
1679 | }
1680 | 
1681 | @ARTICLE{Wooldridge.2001,
1682 |   author = {Jeffrey M. Wooldridge},
1683 |   title = {Asymptotic properties of weighted m-estimators for standard stratified
1684 | 	samples},
1685 |   journal = {Econometric Theory},
1686 |   year = {2001},
1687 |   volume = {17},
1688 |   pages = {451-470},
1689 |   owner = {Laurence},
1690 |   timestamp = {2010.01.21}
1691 | }
1692 | 
1693 | @ARTICLE{Wooldridge.1999,
1694 |   author = {Jeffrey M. Wooldridge},
1695 |   title = {Asymptotic properties of weighted M-estimators for variable probability
1696 | 	samples},
1697 |   journal = {Econometrica},
1698 |   year = {1999},
1699 |   volume = {67},
1700 |   pages = {1385-1406},
1701 |   owner = {Laurence},
1702 |   timestamp = {2010.01.21}
1703 | }
1704 | 
1705 | @ARTICLE{YuanLin.2007,
1706 |   author = {Ming Yuan and Yi Lin},
1707 |   title = {Model selection and estimation in the Gaussian graphical model},
1708 |   journal = {Biometrika},
1709 |   year = {2007},
1710 |   volume = {94},
1711 |   pages = {19-35},
1712 |   owner = {Laurence},
1713 |   timestamp = {2011.03.16}
1714 | }
1715 | 
1716 | 


--------------------------------------------------------------------------------
/docs/tex/vignette.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laurencium/Causalinference/630e8fb195754a720da41791b725d3dadabfb257/docs/tex/vignette.pdf


--------------------------------------------------------------------------------
/docs/tex/vignette.tex:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  5 | 
  6 | 
  7 | \documentclass[12pt]{article}
  8 | 
  9 | 
 10 | \usepackage{amsmath, amsthm, amssymb, setspace, fullpage, apacite, enumitem, listings}
 11 | \usepackage[margin=0.7in]{geometry}
 12 | \usepackage[english]{babel}
 13 | 
 14 | \renewcommand{\qedsymbol}{$\scriptstyle \blacksquare$}
 15 | \renewcommand{\vec}[1]{\mbox{\boldmath$#1$}}
 16 | \newcommand{\dto}{\overset{d}{\to}}
 17 | \newcommand{\pto}{\overset{p}{\to}}
 18 | \newcommand{\E}{\mathrm{E}}
 19 | \newcommand{\F}{\mathfrak{F}}
 20 | \newcommand{\I}{\mathrm{I}}
 21 | \newcommand{\M}{\mathfrak{M}}
 22 | \newcommand{\N}{\mathrm{N}}
 23 | \newcommand{\diag}{\mathrm{diag}}
 24 | \renewcommand{\P}{\mathrm{P}}
 25 | \newcommand{\Q}{\mathrm{Q}}
 26 | \newcommand{\Cov}{\mathrm{Cov}}
 27 | \newcommand{\Var}{\mathrm{Var}}
 28 | \newcommand{\betav}{\vec{\beta}}
 29 | \newcommand{\betahat}{\hat{\vec{\beta}}}
 30 | \newcommand{\argmax}{\operatornamewithlimits{argmax}}
 31 | \newcommand{\argmin}{\operatornamewithlimits{argmin}}
 32 | \newcommand{\plim}{\operatornamewithlimits{plim}}
 33 | \newcommand{\interior}{\operatornamewithlimits{int}}
 34 | 
 35 | \newcommand\independent{\protect\mathpalette{\protect\independenT}{\perp}}
 36 | \def\independenT#1#2{\mathrel{\setbox0\hbox{$#1#2$}%
 37 | \copy0\kern-\wd0\mkern4mu\box0}}  % statistical independence symbol
 38 | 
 39 | \newtheorem{thm}{Theorem}[section]
 40 | \newtheorem{corollary}[thm]{Corollary}
 41 | \newtheorem{lemma}[thm]{Lemma}
 42 | \newtheorem{axiom}[thm]{Axiom}
 43 | 
 44 | \theoremstyle{definition}
 45 | \newtheorem{defn}[thm]{Definition}
 46 | 
 47 | \theoremstyle{definition}
 48 | \newtheorem{example}[thm]{Example}
 49 | 
 50 | \theoremstyle{definition}
 51 | \newtheorem{assumption}[thm]{Assumption}
 52 | 
 53 | \theoremstyle{remark}
 54 | \newtheorem{remark}[thm]{Remark}
 55 | 
 56 | 
 57 | \onehalfspace
 58 | %\setlength{\parskip}{1ex}
 59 | 
 60 | 
 61 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 62 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 63 | 
 64 | \begin{document}
 65 | 
 66 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 67 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 68 | 
 69 | \title{Causal Inference in Python: A Vignette}
 70 | \author{Laurence Wong}
 71 | \maketitle
 72 | 
 73 | This document illustrates the use of \textit{Causalinference} with a simple simulated data set. We begin with some basic definitions.
 74 | 
 75 | \section{Setting and Notation}
 76 | 
 77 | As is standard in the literature, we work within the framework of Rubin's potential outcome model \cite{Rubin.1974}.
 78 | 
 79 | Let $Y(0)$ denote the potential outcome of a subject in the absence of treatment, and let $Y(1)$ denote the unit's potential outcome when it is treated. Let $D$ denote treatment status, with $D=1$ indicating treatment and $D=0$ indicating control, and let $X$ be a $K$-column vector of covariates or individual characteristics.
 80 | 
 81 | For unit $i$, $i=1,2,\ldots,N$, the observed outcome can be written as
 82 | \[Y_i = (1-D_i) Y_i(0) + D_i Y_i(1).\]
 83 | The set of observables $(Y_i, D_i, X_i)$, $i=1,2,\ldots,N$, forms the basic input data set for \textit{Causalinference}.
 84 | 
 85 | \textit{Causalinference} is appropriate for settings in which treatment can be said to be \textit{strongly ignorable}, as defined in Rosenbaum and Rubin \citeyear{RosenbaumRubin.1983}. That is, for all $x$ in the support of $X$, we have
 86 | \begin{itemize}
 87 | \item[(i)] Unconfoundedness: $D$ is independent of $\big(Y(0), Y(1)\big)$ conditional on $X=x$;
 88 | \item[(ii)] Overlap: $c < \P(D=1|X=x) < 1-c$, for some $c>0$.
 89 | \end{itemize}
 90 | 
 91 | In the following, we illustrate the typical flow of a causal analysis using the tools of \textit{Causalinference} and a simulated data set. In simulating the data, we specified a constant treatment effect of 10 for simplicity, and incorporated systematic overlap issues and nonlinearities to highlight a number of tools in the package. We focus mostly on illustrating the use of \textit{Causalinference}; for details on methodology please refer to Imbens and Rubin \citeyear{ImbensRubin.2015}.
 92 | 
 93 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 94 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 95 | 
 96 | \section{Initialization} \label{sec.a}
 97 | 
 98 | The main object of interest in \textit{Causalinference} is the class \texttt{CausalModel}, which we can import with
 99 | \begin{verbatim}
100 |   >>> from causalinference import CausalModel
101 | \end{verbatim}
102 | \texttt{CausalModel} takes as inputs three NumPy arrays: \texttt{Y}, an $N$-vector of observed outcomes; \texttt{D}, an $N$-vector of treatment status indicators; and \texttt{X}, an $N$-by-$K$ matrix of covariates. To initialize a \texttt{CausalModel} instance, simply run:
103 | \begin{verbatim}
104 |   >>> causal = CausalModel(Y, D, X)
105 | \end{verbatim}
106 | 
107 | Once an instance of the class \texttt{CausalModel} has been created, it will contain a number of attributes and methods that are relevant for conducting causal analyses. Tables \ref{tab.a} and \ref{tab.b} contain a brief description of these attributes and methods.
108 | 
109 | \texttt{CausalModel} is \textit{stateful}. As we employ some of the methods to be discussed subsequently, the instance \texttt{causal} will mutate, with new data being added or existing data being modified or dropped. Running
110 | \begin{verbatim}
111 |   >>> causal.reset()
112 | \end{verbatim}
113 | will return \texttt{causal} to its initial state.
114 | 
115 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
116 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
117 | 
118 | \section{Summary Statistics} \label{sec.b}
119 | 
120 | Once \texttt{CausalModel} has been instantiated, basic summary statistics will be computed and stored in the attribute \texttt{summary\_stats}. We can display it by running:
121 | \begin{verbatim}
122 |   >>> print(causal.summary_stats)
123 | \end{verbatim}
124 | \begin{verbatim}
125 |   Summary Statistics
126 |   
127 |                          Controls (N_c=392)         Treated (N_t=608)             
128 |          Variable         Mean         S.d.         Mean         S.d.     Raw-diff
129 |   --------------------------------------------------------------------------------
130 |                 Y       43.097       31.353       90.911       41.815       47.814
131 |   
132 |                          Controls (N_c=392)         Treated (N_t=608)             
133 |          Variable         Mean         S.d.         Mean         S.d.     Nor-diff
134 |   --------------------------------------------------------------------------------
135 |                X0        3.810        2.950        5.762        2.566        0.706
136 |                X1        3.436        2.848        5.849        2.634        0.880
137 | \end{verbatim}
138 | 
139 | The attribute \texttt{summary\_stats} is in reality just a dictionary-like object with special method defined to enable the display of the above table. In many situations it is more convenient to simply access the relevant statistic directly. To retrieve the vector of covariate means for the treatment group, for example, we simply run:
140 | \begin{verbatim}
141 |   >>> causal.summary_stats['X_t_mean']
142 |   array([ 5.76232357,  5.8489734 ])
143 | \end{verbatim}
144 | 
145 | Since \texttt{summary\_stats} behaves like a dictionary, it is equipped with the usual Python dictionary methods. To list the dictionary keys, for instance, we go:
146 | \begin{verbatim}
147 |   >>> causal.summary_stats.keys()
148 |   ['Y_c_mean', 'X_t_sd', 'N_t', 'K', 'ndiff', 'N', 'Y_t_sd', 'rdiff', 'Y_t_mean',
149 |   'X_c_mean', 'X_t_mean', 'Y_c_sd', 'X_c_sd', 'N_c']
150 | \end{verbatim}
151 | 
152 | Here \texttt{rdiff} refers to the difference in average observed outcomes between treatment and control groups. \texttt{ndiff}, on the other hand, refers to the normalized differences in average covariates, defined as
153 | \[\frac{\bar{X}_{k,t} - \bar{X}_{k,c}}{\sqrt{\left(s^2_{k,t}+s^2_{k,c}\right)\Big/ 2}},\]
154 | where $\bar{X}_{k,t}$ and $s_{k,t}$ are the sample mean and sample standard deviation of the $k$th covariate of the treatment group, and $\bar{X}_{k,c}$ and $s_{k,c}$ are the analogous statistics for the control group.
155 | 
156 | The normalized differences in average covariates provide a way to measure the covariate balance between the treatment and the control groups. Unlike the t-statistic, its absolute magnitude does not increase (in expectation) as the sample size increases.
157 | 
158 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
159 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
160 | 
161 | \section{Least Squares Estimation}
162 | 
163 | One of the simplest treatment effect estimators is the ordinary least squares (OLS) estimator. \textit{Causalinference} provides several common regression specifications.
164 | 
165 | By default, the method \texttt{est\_via\_ols} will run the following regression:
166 | \[Y_i = \alpha + \beta D_i + \gamma' (X_i-\bar{X}) + \delta' D_i (X_i-\bar{X}) + \varepsilon_i.\]
167 | 
168 | To inspect any treatment effect estimates produced, we can simply invoke \texttt{print} on the attribute \texttt{estimates}, as in below:
169 | \begin{verbatim}
170 |   >>> causal.est_via_ols()
171 |   >>> print(causal.estimates)
172 | \end{verbatim}
173 | \begin{verbatim}
174 |   Treatment Effect Estimates: OLS
175 |   
176 |                        Est.       S.e.          z      P>|z|      [95% Conf. int.]
177 |   --------------------------------------------------------------------------------
178 |              ATE      3.672      0.906      4.051      0.000      1.895      5.449
179 |              ATC     -0.227      0.930     -0.244      0.807     -2.050      1.596
180 |              ATT      6.186      1.067      5.799      0.000      4.095      8.277
181 | \end{verbatim}
182 | Here ATE, ATC, and ATT stand for, respectively, average treatment effect, average treatment effect for the controls, and average treatment effect for the treated. Like \texttt{summary\_stats}, the attribute \texttt{estimates} is a dictionary-like object that contains the estimation results.
183 | 
184 | Including interaction terms between the treatment indicator $D$ and covariates $X$ implies that treatment effects can differ across individuals. In some instances we may want to assume a constant treatment effect, and only run
185 | \[Y_i = \alpha + \beta D_i + \gamma' (X_i-\bar{X}_i) + \varepsilon_i.\]
186 | This can be achieved by supplying a value of 1 in \texttt{est\_via\_ols} to the optional parameter \texttt{adj} (its default value is 2). To compute the raw difference in average outcomes between treatment and control groups, we can set \texttt{adj=0}.
187 | 
188 | In this example, the least squares estimates are radically different from the true treatment effect of 10. This is the result of the nonlinearity and non-overlap issues intentionally introduced into the data simulation process. As we shall see, several other tools exist in \textit{Causalinference} that can better deal with a lack of overlap and that will allow us to obtain estimates that are less sensitive to functional form assumptions.
189 | 
190 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
191 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
192 | 
193 | \section{Propensity Score Estimation} \label{sec.c}
194 | 
195 | The probability of getting treatment conditional on the covariates, $p(X_i) = \P(D_i=1|X_i)$, also known as the propensity score, plays a central role in much of what follows. Two methods, \texttt{est\_propensity} and \texttt{est\_propensity\_s}, are provided for propensity score estimation. Both involve running a logistic regression of the treatment indicator $D$ on functions of the covariates. \texttt{est\_propensity} allows the user to specify the covariates to include linearly and/or quadratically, while \texttt{est\_propensity\_s} will make this choice automatically based on a sequence of likelihood ratio tests.
196 | 
197 | In the following, we run \texttt{est\_propensity\_s} and display the estimation results. In this example, the specification selection algorithm decided to include both covariates and all the interaction and quadratic terms.
198 | 
199 | \begin{verbatim}
200 |   >>> causal.est_propensity_s()
201 |   >>> print(causal.propensity)
202 | \end{verbatim}
203 | \begin{verbatim}
204 |   Estimated Parameters of Propensity Score
205 |   
206 |                       Coef.       S.e.          z      P>|z|      [95% Conf. int.]
207 |   --------------------------------------------------------------------------------
208 |        Intercept     -2.839      0.526     -5.401      0.000     -3.870     -1.809
209 |               X1      0.486      0.153      3.178      0.001      0.186      0.786
210 |               X0      0.466      0.155      3.011      0.003      0.163      0.770
211 |            X1*X0      0.080      0.015      5.391      0.000      0.051      0.109
212 |            X0*X0     -0.045      0.012     -3.579      0.000     -0.069     -0.020
213 |            X1*X1     -0.045      0.013     -3.542      0.000     -0.070     -0.020
214 | \end{verbatim}
215 | 
216 | The \texttt{propensity} attribute is again another dictionary-like container of results. The dictionary keys of \texttt{propensity} can be found by running:
217 | \begin{verbatim}
218 |   >>> causal.propensity.keys()
219 |   ['coef', 'lin', 'qua', 'loglike', 'fitted', 'se']
220 | \end{verbatim}
221 | The selected linear and quadratic terms are contained in the lists \texttt{causal.propensity['lin']} and \texttt{causal.propensity['qua']}. Though we won't make direct calls to it, most of the propensity-based techniques discussed subsequently are based on \texttt{causal.propensity['fitted']}, the vector of estimated propensity scores.
222 | 
223 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
224 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
225 | 
226 | \section{Improving Covariate Balance} \label{sec.d}
227 | 
228 | When there is indication of covariate imbalance, we may wish to construct a sample where the treatment and control groups are more similar than the original full sample. One way of doing so is by dropping units with extreme values of propensity score. For these subjects, their covariate values are such that the probability of being in the treatment (or control) group is so overwhelmingly high that we cannot reliably find comparable units in the opposite group. We may wish to forego estimating treatment effects for such units since nothing much can be credibly said about them.
229 | 
230 | A good rule-of-thumb is to drop units whose estimated propensity score is less than $\alpha=0.1$ or greater than $1-\alpha=0.9$. By default, once the propensity score has been estimated by running either \texttt{est\_propensity} or \texttt{est\_propensity\_s}, a value of 0.1 will be set for the attribute \texttt{cutoff}:
231 | 
232 | \begin{verbatim}
233 |   >>> causal.cutoff
234 |   0.1
235 | \end{verbatim}
236 | 
237 | Calling \texttt{causal.trim()} at this point will drop every unit that has propensity score outside of the $[\alpha, 1-\alpha]$ interval. Alternatively, a procedure exists that will estimate the optimal cutoff that minimizes the asymptotic sampling variance of the trimmed sample. The method \texttt{trim\_s} will perform this calculation, set the \texttt{cutoff} to the optimal $\alpha$, and then invoke \texttt{trim} to construct the subsample. For our example, the optimal $\alpha$ was estimated to be slightly less than 0.1:
238 | \begin{verbatim}
239 |   >>> causal.trim_s()
240 |   >>> causal.cutoff
241 |   0.0954928016329
242 | \end{verbatim}
243 | The complexity of this cutoff selection algorithm is only $O(N \log N)$, so in practice there is very little reason to not employ it.
244 | 
245 | If we now print \texttt{summary\_stats} again to view the summary statistics of the trimmed sample, we see that the normalized differences in average covariates has fallen noticeably.
246 | \begin{verbatim}
247 |   >>> print(causal.summary_stats)
248 | \end{verbatim}
249 | \begin{verbatim}
250 |   Summary Statistics
251 |   
252 |                          Controls (N_c=371)         Treated (N_t=363)             
253 |          Variable         Mean         S.d.         Mean         S.d.     Raw-diff
254 |   --------------------------------------------------------------------------------
255 |                 Y       41.331       29.608       66.067       28.108       24.736
256 |   
257 |                          Controls (N_c=371)         Treated (N_t=363)             
258 |          Variable         Mean         S.d.         Mean         S.d.     Nor-diff
259 |   --------------------------------------------------------------------------------
260 |                X0        3.709        2.872        4.658        2.522        0.351
261 |                X1        3.407        2.784        4.661        2.517        0.472
262 | \end{verbatim}
263 | 
264 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
265 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
266 | 
267 | \section{Stratifying the Sample} \label{sec.e}
268 | 
269 | With the propensity score estimated, one may wish to stratify the sample into blocks that have units that are more similar in terms of their covariates. This makes the treatment and control groups within each propensity bin more comparable, and therefore treatment effect estimates more credible.
270 | 
271 | \textit{Causalinference} provides two methods for subclassification based on propensity score. The first, \texttt{stratify}, splits the sample based on what is specified in the attribute \texttt{blocks}. The default value of \texttt{blocks} is set to 5, which means that \texttt{stratify} will split the sample into 5 equal-sized bins. In contrast, the second method, \texttt{stratify\_s}, will use a data-driven procedure for selecting both the number of blocks and their boundaries, with the expectation that the number of blocks should increase with the sample size. Operationally this method is a divide-and-conquer algorithm that recursively divides the sample into two until there is no significant advantage of doing so. This algorithm also runs in $O(N \log N)$ time, so costs relatively little to use. 
272 | 
273 | To inspect the results of the stratification, we can invoke \texttt{print} on the attribute \texttt{strata} to display some summary statistics, as follows:
274 | \begin{verbatim}
275 |   >>> causal.stratify_s()
276 |   >>> print(causal.strata)
277 | \end{verbatim}
278 | \begin{verbatim}
279 |   Stratification Summary
280 |   
281 |                 Propensity Score         Sample Size     Ave. Propensity   Outcome
282 |      Stratum      Min.      Max.  Controls   Treated  Controls   Treated  Raw-diff
283 |   --------------------------------------------------------------------------------
284 |            1     0.095     0.265       157        28     0.188     0.187    11.885
285 |            2     0.266     0.474       111        72     0.360     0.367    12.025
286 |            3     0.477     0.728        70       113     0.598     0.601    11.696
287 |            4     0.728     0.836        23        69     0.781     0.787    10.510
288 |            5     0.838     0.904        10        81     0.865     0.873     3.405
289 | \end{verbatim}
290 | 
291 | Under the hood, the attribute \texttt{strata} is actually a list-like object that contains, as each of its elements, a full instance of the class \texttt{CausalModel}, with the input data being those that correspond to the units that are in the propensity bin. We can thus, for example, access each stratum and inspect its \texttt{summary\_stats} attribute, or as the following illustrates, loop through \texttt{strata} and estimate within-bin treatment effects using least squares.
292 | \begin{verbatim}
293 |   >>> for stratum in causal.strata:
294 |   ...     stratum.est_via_ols(adj=1)
295 |   ... 
296 |   >>> [stratum.estimates['ols']['ate'] for stratum in causal.strata]
297 |   [10.379170390195197, 9.2918973715823707, 9.67876709257445, 9.6722830043583023,
298 |   9.2239596078238222]
299 | \end{verbatim}
300 | 
301 | Note that these estimates are much more stable and closer to the true value of 10 than the within-bin raw differences in average outcomes that were reported in the stratification summary table, highlighting the virtue of further controlling for covariates even within blocks.
302 | 
303 | Taking the sample-weighted average of the above within-bin least squares estimates results in a propensity score matching estimator that is commonly known as the subclassification estimator or blocking estimator. However, instead of manually looping through the \texttt{strata} attribute, estimating within-bin treatment effects, and then averaging appropriately to arrive at an overall estimate, we can also simply call \texttt{est\_via\_blocking}, which will perform these operations and collect the results in the attribute \texttt{estimates}. We will report these estimates in the next section along with estimates obtained from other, alternative estimators.
304 | 
305 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
306 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
307 | 
308 | \section{Treatment Effect Estimation} \label{sec.f}
309 | 
310 | In addition to least squares and the blocking estimator described in the last section, \textit{Causalinference} provides two alternative treatment effect estimators. The first is the nearest neighborhood matching estimator of Abadie and Imbens \citeyear{AbadieImbens.2006}. Instead of relying on the propensity score, this estimator pairs treatment and control units by matching directly on the covariate vectors themselves. More specifically, each unit $i$ in the sample is matched with a unit $m(i)$ in the opposite group, where
311 | \[m(i) = \argmin_{j: D_j \neq D_i} \|X_j - X_i\|,\]
312 | and $\|X_j - X_i\|$ is some measure of distance between the covariate vectors $X_j$ and $X_i$. The method \texttt{est\_via\_matching} implements this estimator, as well as several extensions that can be invoked through optional arguments.
313 | 
314 | The last estimator is a version of the Horvitz-Thompson weighting estimator, modified to further adjust for covariates. Mechanically, this involves running the following weight least squares regression:
315 | \[Y_i = \alpha + \beta D_i + \gamma' X_i + \varepsilon_i,\]
316 | where the weight for unit $i$ is $1/\hat{p}(X)$ if $i$ is in the treatment group, and $1/\big(1-\hat{p}(X)\big)$ if $i$ is in the control group. This estimator is also sometimes called the doubly-robust estimator, referring to the fact that this estimator is consistent if either the specification of the propensity score is correct, or the specification of the regression function is correct. We can invoke it by calling \texttt{est\_via\_weighting}. Note that under this specification the treatment effect does not differ across units, so the ATC and the ATT are both equal to the overall ATE.
317 | 
318 | In the following we invoke each of the four estimators (including least squares, since the input data has changed now that the sample has been trimmed), and print out the resulting estimates.
319 | \begin{verbatim}
320 |   >>> causal.est_via_ols()
321 |   >>> causal.est_via_weighting()
322 |   >>> causal.est_via_blocking()
323 |   >>> causal.est_via_matching(bias_adj=True)
324 |   >>> print(causal.estimates)
325 | \end{verbatim}
326 | \begin{verbatim}
327 |   Treatment Effect Estimates: OLS
328 |   
329 |                        Est.       S.e.          z      P>|z|      [95% Conf. int.]
330 |   --------------------------------------------------------------------------------
331 |              ATE      2.913      0.803      3.627      0.000      1.339      4.487
332 |              ATC      2.435      0.824      2.956      0.003      0.820      4.049
333 |              ATT      3.401      0.885      3.843      0.000      1.667      5.136
334 | \end{verbatim}
335 | \begin{verbatim}
336 |   Treatment Effect Estimates: Weighting
337 |   
338 |                        Est.       S.e.          z      P>|z|      [95% Conf. int.]
339 |   --------------------------------------------------------------------------------
340 |              ATE     17.821      1.684     10.585      0.000     14.521     21.121
341 | \end{verbatim}
342 | \begin{verbatim}
343 |   Treatment Effect Estimates: Blocking
344 |   
345 |                        Est.       S.e.          z      P>|z|      [95% Conf. int.]
346 |   --------------------------------------------------------------------------------
347 |              ATE      9.702      0.381     25.444      0.000      8.954     10.449
348 |              ATC      9.847      0.527     18.701      0.000      8.815     10.879
349 |              ATT      9.553      0.332     28.771      0.000      8.903     10.204
350 | \end{verbatim}
351 | \begin{verbatim}
352 |   Treatment Effect Estimates: Matching
353 |   
354 |                        Est.       S.e.          z      P>|z|      [95% Conf. int.]
355 |   --------------------------------------------------------------------------------
356 |              ATE      9.624      0.245     39.354      0.000      9.145     10.103
357 |              ATC      9.642      0.270     35.776      0.000      9.114     10.170
358 |              ATT      9.606      0.318     30.159      0.000      8.981     10.230
359 | \end{verbatim}
360 | 
361 | As we can see above, despite the trimming the least squares estimates are still severely biased, as is the weighting estimator (since neither the propensity score or the regression function is correctly specified). The blocking and matching estimators, on the other hand, are less sensitive to specification assumptions, and thus result in estimates that are closer to the true average treatment effects.
362 | 
363 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
364 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
365 | 
366 | \bibliographystyle{apacite}
367 | \bibliography{references}
368 | 
369 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
370 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
371 | 
372 | \begin{table}[ht]
373 | \begin{center}\begin{tabular}{ll}
374 | Attribute & Description \\
375 | \texttt{summary\_stats} & Dictionary-like object containing summary statistics for the \\
376 | & outcome variable and the covariates. \\
377 | \texttt{propensity} & Dictionary-like object containing propensity score data, \\
378 | & including estimated logistic regression coefficients, predicted \\
379 | & propensity score, maximized log-likelihood, and the lists of the \\
380 | & linear and quadratic terms that are included in the regression. \\
381 | \texttt{cutoff} & Floating point number specifying the cutoff point for trimming \\
382 | & on propensity score.\\
383 | \texttt{blocks} & Either an integer indicating the number of equal-sized blocks to \\
384 | & stratify the sample into, or a list of ascending numbers specifying \\
385 | & the boundaries of each stratum. \\
386 | \texttt{strata} & List-like object containing the list of stratified propensity bins. \\
387 | \texttt{estimates} & Dictionary-like object containing treatment effect estimates for \\
388 | & each estimator used.
389 | \end{tabular}\end{center}
390 | \caption{Attributes of the class \texttt{CausalModel}. Invoking \texttt{print} on any of the dictionary- or list-like attribute above yields customized summary tables.}  \label{tab.a}
391 | \end{table}
392 | 
393 | \begin{table}[ht]
394 | \begin{center}\begin{tabular}{ll}
395 | Method & Description \\
396 | \texttt{reset} & Reinitializes data to original inputs, and drops any estimated results. \\
397 | \texttt{est\_propensity} & Estimates via logistic regression the propensity score using specified \\
398 | & linear and quadratic terms. \\
399 | \texttt{est\_propensity\_s} & Estimates via logistic regression the propensity score using the \\
400 | & covariate selection algorithm of Imbens and Rubin \citeyear{ImbensRubin.2015}. \\
401 | \texttt{trim} & Trims data based on propensity score using the threshold specified \\
402 | & by the attribute \texttt{cutoff}. \\
403 | \texttt{trim\_s} & Trims data based on propensity score using the cutoff selected by \\
404 | & the procedure of Crump, Hotz, Imbens, and Mitnik \citeyear{CrumpHotzImbensMitnik.2009}. \\
405 | \texttt{stratify} & Stratifies the sample based on propensity score as specified by \\
406 | & the attribute \texttt{blocks}. \\
407 | \texttt{stratify\_s} & Stratifies the sample based on propensity score using the bin \\
408 | & selection procedure suggested by Imbens and Rubin \citeyear{ImbensRubin.2015}. \\
409 | \texttt{est\_via\_ols} & Estimates average treatment effects using least squares. \\
410 | \texttt{est\_via\_weighting} & Estimates average treatment effects using the Horvitz-Thompson \\
411 | & weighting estimator modified to incorporate covariates. \\
412 | \texttt{est\_via\_blocking} & Estimates average treatment effects using regression within blocks. \\
413 | \texttt{est\_via\_matching} & Estimates average treatment effects using matching with replacement.
414 | \end{tabular}\end{center}
415 | \caption{Methods of the class \texttt{CausalModel}. Invoke \texttt{help} on any of the above methods for more detailed documentation.}  \label{tab.b}
416 | \end{table}
417 | 
418 | \clearpage
419 | 
420 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
421 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
422 | 
423 | \end{document}
424 | 
425 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | try:
 2 | 	from setuptools import setup
 3 | except ImportError:
 4 | 	from distutils.core import setup
 5 | 
 6 | config = {
 7 | 	'name': 'CausalInference',
 8 | 	'version': '0.1.3',
 9 | 	'url': 'https://github.com/laurencium/causalinference',
10 | 	'author': 'Laurence Wong',
11 | 	'author_email': 'laurencium@gmail.com',
12 | 	'packages': ['causalinference', 'causalinference.core',
13 | 	             'causalinference.estimators', 'causalinference.utils'],
14 | 	'include_package_data': True,
15 | 	'package_data': {'causalinference': ['utils/*.txt']},
16 | 	'license': 'LICENSE.txt',
17 | 	'description': 'Causal Inference in Python',
18 | 	'long_description': open('README.rst').read(),
19 | }
20 | 
21 | setup(**config)
22 | 
23 | 


--------------------------------------------------------------------------------
/tests/test_blocking.py:
--------------------------------------------------------------------------------
 1 | from nose.tools import *
 2 | import numpy as np
 3 | 
 4 | import causalinference.estimators.blocking as b
 5 | import causalinference.causal as c
 6 | 
 7 | 
 8 | def test_calc_atx():
 9 | 
10 | 	atxs = [0.5, 3.2, -9.4]
11 | 	Ns = [5, 13, 7]
12 | 	ans = -0.868
13 | 
14 | 	assert np.allclose(b.calc_atx(atxs, Ns), ans)
15 | 
16 | 
17 | def test_atx_se():
18 | 
19 | 	atx_ses = [0.3, 1.3, 0.8]
20 | 	Ns = [3, 8, 4]
21 | 	ans = 0.72788888
22 | 
23 | 	assert np.allclose(b.calc_atx_se(atx_ses, Ns), ans)
24 | 
25 | 
26 | def test_blocking():
27 | 
28 | 	Y1 = np.array([52, 30, 5, 29, 12, 10, 44, 87])
29 | 	D1 = np.array([0, 0, 0, 0, 1, 1, 1, 1])
30 | 	X1 = np.array([[1, 42], [3, 32], [9, 7], [12, 86],
31 | 	               [5, 94], [4, 36], [2, 13], [6, 61]])
32 | 	causal1 = c.CausalModel(Y1, D1, X1)
33 | 	Y2 = np.array([16, 4, 10, 6, 9, 11])
34 | 	D2 = np.array([0, 0, 0, 1, 1, 1])
35 | 	X2 = np.array([[1], [3], [3], [1], [7], [2]])
36 | 	causal2 = c.CausalModel(Y2, D2, X2)
37 | 	strata = [causal1, causal2]
38 | 
39 | 	adj1 = 0
40 | 	blocking1 = b.Blocking(strata, adj1)
41 | 	ate1 = 4.714286
42 | 	atc1 = 4.714286
43 | 	att1 = 4.714286
44 | 	ate_se1 = 10.18945
45 | 	atc_se1 = 10.18945
46 | 	att_se1 = 10.18945
47 | 	assert np.allclose(blocking1['ate'], ate1)
48 | 	assert np.allclose(blocking1['atc'], atc1)
49 | 	assert np.allclose(blocking1['att'], att1)
50 | 	assert np.allclose(blocking1['ate_se'], ate_se1)
51 | 	assert np.allclose(blocking1['atc_se'], atc_se1)
52 | 	assert np.allclose(blocking1['att_se'], att_se1)
53 | 
54 | 	adj2 = 1
55 | 	blocking2 = b.Blocking(strata, adj2)
56 | 	ate2 = 1.657703
57 | 	atc2 = 1.657703
58 | 	att2 = 1.657703
59 | 	ate_se2 = 10.22921
60 | 	atc_se2 = 10.22921
61 | 	att_se2 = 10.22921
62 | 	assert np.allclose(blocking2['ate'], ate2)
63 | 	assert np.allclose(blocking2['atc'], atc2)
64 | 	assert np.allclose(blocking2['att'], att2)
65 | 	assert np.allclose(blocking2['ate_se'], ate_se2)
66 | 	assert np.allclose(blocking2['atc_se'], atc_se2)
67 | 	assert np.allclose(blocking2['att_se'], att_se2)
68 | 
69 | 	adj3 = 2
70 | 	blocking3 = b.Blocking(strata, adj3)
71 | 	ate3 = 17.83044057
72 | 	atc3 = 35.45842407
73 | 	att3 = 0.20250793
74 | 	ate_se3 = 11.42591
75 | 	atc_se3 = 17.11964
76 | 	att_se3 = 6.87632
77 | 	assert np.allclose(blocking3['ate'], ate3)
78 | 	assert np.allclose(blocking3['atc'], atc3)
79 | 	assert np.allclose(blocking3['att'], att3)
80 | 	assert np.allclose(blocking3['ate_se'], ate_se3)
81 | 	assert np.allclose(blocking3['atc_se'], atc_se3)
82 | 	assert np.allclose(blocking3['att_se'], att_se3)
83 | 
84 | 


--------------------------------------------------------------------------------
/tests/test_causal.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from nose.tools import *
  3 | import numpy as np
  4 | 
  5 | import causalinference.causal as c
  6 | from utils import random_data
  7 | 
  8 | 
  9 | def test_est_propensity():
 10 | 
 11 | 	D = np.array([0, 0, 0, 1, 1, 1])
 12 | 	X = np.array([[7, 8], [3, 10], [7, 10], [4, 7], [5, 10], [9, 8]])
 13 | 	Y = random_data(D_cur=D, X_cur=X)
 14 | 	causal = c.CausalModel(Y, D, X)
 15 | 
 16 | 	causal.est_propensity()
 17 | 	lin = [0, 1]
 18 | 	qua = []
 19 | 	coef = np.array([6.8066090, -0.0244874, -0.7524939])
 20 | 	loglike = -3.626517
 21 | 	fitted = np.array([0.6491366, 0.3117840, 0.2911631,
 22 | 	                   0.8086407, 0.3013733, 0.6379023])
 23 | 	se = np.array([8.5373779, 0.4595191, 0.8106499])
 24 | 	keys = {'lin', 'qua', 'coef', 'loglike', 'fitted', 'se'}
 25 | 	
 26 | 	assert_equal(causal.propensity['lin'], lin)
 27 | 	assert_equal(causal.propensity['qua'], qua)
 28 | 	assert np.allclose(causal.propensity['coef'], coef)
 29 | 	assert np.allclose(causal.propensity['loglike'], loglike)
 30 | 	assert np.allclose(causal.propensity['fitted'], fitted)
 31 | 	assert np.allclose(causal.propensity['se'], se)
 32 | 	assert_equal(set(causal.propensity.keys()), keys)
 33 | 	assert np.allclose(causal.raw_data['pscore'], fitted)
 34 | 	
 35 | 
 36 | def test_est_propensity_s():
 37 | 
 38 | 	D = np.array([0, 0, 0, 1, 1, 1])
 39 | 	X = np.array([[7, 8], [3, 10], [7, 10], [4, 7], [5, 10], [9, 8]])
 40 | 	Y = random_data(D_cur=D, X_cur=X)
 41 | 	causal = c.CausalModel(Y, D, X)
 42 | 
 43 | 	causal.est_propensity_s()
 44 | 	lin1 = [1]
 45 | 	qua1 = []
 46 | 	coef1 = np.array([6.5424027, -0.7392041])
 47 | 	loglike1 = -3.627939
 48 | 	fitted1 = np.array([0.6522105, 0.2995088, 0.2995088,
 49 | 	                   0.7970526, 0.2995088, 0.6522105])
 50 | 	se1 = np.array([6.8455179, 0.7641445])
 51 | 	keys = {'lin', 'qua', 'coef', 'loglike', 'fitted', 'se'}
 52 | 	
 53 | 	assert_equal(causal.propensity['lin'], lin1)
 54 | 	assert_equal(causal.propensity['qua'], qua1)
 55 | 	assert np.allclose(causal.propensity['coef'], coef1)
 56 | 	assert np.allclose(causal.propensity['loglike'], loglike1)
 57 | 	assert np.allclose(causal.propensity['fitted'], fitted1)
 58 | 	assert np.allclose(causal.propensity['se'], se1)
 59 | 	assert_equal(set(causal.propensity.keys()), keys)
 60 | 	assert np.allclose(causal.raw_data['pscore'], fitted1)
 61 | 
 62 | 	causal.est_propensity_s([0,1])
 63 | 	lin2 = [0, 1]
 64 | 	qua2 = []
 65 | 	coef2 = np.array([6.8066090, -0.0244874, -0.7524939])
 66 | 	loglike2 = -3.626517
 67 | 	fitted2 = np.array([0.6491366, 0.3117840, 0.2911631,
 68 | 	                    0.8086407, 0.3013733, 0.6379023])
 69 | 	se2 = np.array([8.5373779, 0.4595191, 0.8106499])
 70 | 
 71 | 	assert_equal(causal.propensity['lin'], lin2)
 72 | 	assert_equal(causal.propensity['qua'], qua2)
 73 | 	assert np.allclose(causal.propensity['coef'], coef2)
 74 | 	assert np.allclose(causal.propensity['loglike'], loglike2)
 75 | 	assert np.allclose(causal.propensity['fitted'], fitted2)
 76 | 	assert np.allclose(causal.propensity['se'], se2)
 77 | 	assert np.allclose(causal.raw_data['pscore'], fitted2)
 78 | 
 79 | 
 80 | def test_est_via_ols():
 81 | 
 82 | 	Y = np.array([52, 30, 5, 29, 12, 10, 44, 87])
 83 | 	D = np.array([0, 0, 0, 0, 1, 1, 1, 1])
 84 | 	X = np.array([[1, 42], [3, 32], [9, 7], [12, 86],
 85 | 	              [5, 94], [4, 36], [2, 13], [6, 61]])
 86 | 	causal = c.CausalModel(Y, D, X)
 87 | 
 88 | 	adj1 = 0
 89 | 	causal.est_via_ols(adj1)
 90 | 	ate1 = 9.25
 91 | 	ate_se1 = 17.68253
 92 | 	keys1 = {'ate', 'ate_se'}
 93 | 	assert np.allclose(causal.estimates['ols']['ate'], ate1)
 94 | 	assert np.allclose(causal.estimates['ols']['ate_se'], ate_se1)
 95 | 	assert_equal(set(causal.estimates['ols'].keys()), keys1)
 96 | 
 97 | 	adj2 = 1
 98 | 	causal.est_via_ols(adj2)
 99 | 	ate2 = 3.654552
100 | 	ate_se2 = 17.749993
101 | 	keys2 = {'ate', 'ate_se'}
102 | 	assert np.allclose(causal.estimates['ols']['ate'], ate2)
103 | 	assert np.allclose(causal.estimates['ols']['ate_se'], ate_se2)
104 | 	assert_equal(set(causal.estimates['ols'].keys()), keys2)
105 | 
106 | 	adj3 = 2
107 | 	causal.est_via_ols(adj3)
108 | 	ate3 = 30.59444
109 | 	atc3 = 63.2095
110 | 	att3 = -2.020611
111 | 	ate_se3 = 19.91887865
112 | 	atc_se3 = 29.92152
113 | 	att_se3 = 11.8586
114 | 	keys3 = {'ate', 'atc', 'att', 'ate_se', 'atc_se', 'att_se'}
115 | 	assert np.allclose(causal.estimates['ols']['ate'], ate3)
116 | 	assert np.allclose(causal.estimates['ols']['atc'], atc3)
117 | 	assert np.allclose(causal.estimates['ols']['att'], att3)
118 | 	assert np.allclose(causal.estimates['ols']['ate_se'], ate_se3)
119 | 	assert np.allclose(causal.estimates['ols']['atc_se'], atc_se3)
120 | 	assert np.allclose(causal.estimates['ols']['att_se'], att_se3)
121 | 	assert_equal(set(causal.estimates['ols'].keys()), keys3)
122 | 
123 | 
124 | def test_parse_lin_terms():
125 | 
126 | 	K1 = 4
127 | 	lin1 = None
128 | 	ans1 = []
129 | 	assert_equal(c.parse_lin_terms(K1, lin1), ans1)
130 | 
131 | 	K2 = 2
132 | 	lin2 = 'all'
133 | 	ans2 = [0, 1]
134 | 	assert_equal(c.parse_lin_terms(K2, lin2), ans2)
135 | 
136 | 	K3 = 2
137 | 	lin3 = [1]
138 | 	ans3 = [1]
139 | 	assert_equal(c.parse_lin_terms(K3, lin3), ans3)
140 | 
141 | 	K4 = 2
142 | 	lin4 = []
143 | 	ans4 = []
144 | 	assert_equal(c.parse_lin_terms(K4, lin4), ans4)
145 | 
146 | 
147 | def test_parse_qua_terms():
148 | 
149 | 	K1 = 3
150 | 	qua1 = None
151 | 	ans1 = []
152 | 	assert_equal(c.parse_qua_terms(K1, qua1), ans1)
153 | 
154 | 	K2 = 2
155 | 	qua2 = 'all'
156 | 	ans2 = [(0, 0), (0, 1), (1, 1)]
157 | 	assert_equal(c.parse_qua_terms(K2, qua2), ans2)
158 | 
159 | 	K3 = 2
160 | 	qua3 = [(0, 1)]
161 | 	ans3 = [(0, 1)]
162 | 	assert_equal(c.parse_qua_terms(K3, qua3), ans3)
163 | 
164 | 	K4 = 2
165 | 	qua4 = []
166 | 	ans4 = []
167 | 	assert_equal(c.parse_qua_terms(K4, qua4), ans4)
168 | 
169 | 
170 | def test_split_equal_bins():
171 | 
172 | 	pscore = np.array([0.05, 0.1, 0.2, 0.3, 0.4, 0.5,
173 | 	                   0.6, 0.7, 0.8, 0.9, 0.95])
174 | 	blocks = 5
175 | 	ans = [0, 0.2, 0.4, 0.6, 0.8, 1]
176 | 
177 | 	assert_equal(c.split_equal_bins(pscore, blocks), ans)
178 | 
179 | 
180 | def test_sumlessthan():
181 | 
182 | 	g1 = np.array([3, 1, 2, 4, 3, 3])
183 | 	sg1 = np.array([1, 2, 3, 3, 3, 4])
184 | 	cs11 = np.array([1, 2, 3, 4, 5, 6])
185 | 	csg1 = np.array([1, 3, 6, 9, 12, 16])
186 | 
187 | 	ans1 = np.array([5, 1, 2, 6, 5, 5])
188 | 	ans2 = np.array([12, 1, 3, 16, 12, 12])
189 | 	assert np.array_equal(c.sumlessthan(g1, sg1, cs11), ans1)
190 | 	assert np.array_equal(c.sumlessthan(g1, sg1, csg1), ans2)
191 | 
192 | 	g2 = np.array([22, 4, 6, 4, 25, 5])
193 | 	sg2 = np.array([4, 4, 5, 6, 22, 25])
194 | 	cs12 = np.array([1, 2, 3, 4, 5, 6])
195 | 	csg2 = np.array([4, 8, 13, 19, 41, 66])
196 | 
197 | 	ans3 = np.array([5, 2, 4, 2, 6, 3])
198 | 	ans4 = np.array([41, 8, 19, 8, 66, 13])
199 | 	assert np.array_equal(c.sumlessthan(g2, sg2, cs12), ans3)
200 | 	assert np.array_equal(c.sumlessthan(g2, sg2, csg2), ans4)
201 | 
202 | 
203 | def test_select_cutoff():
204 | 
205 | 	g1 = np.array([3, 1, 2, 4, 3, 3])
206 | 	ans1 = 0
207 | 	assert_equal(c.select_cutoff(g1), ans1)
208 | 
209 | 	g2 = np.array([22, 4, 6, 4, 25, 5])
210 | 	ans2 = 0.2113248654
211 | 	assert np.allclose(c.select_cutoff(g2), ans2)
212 | 
213 | 
214 | def test_calc_tstat():
215 | 
216 | 	sample1 = np.array([1, 1, 2, 2, 3, 3, 3, 3, 3, 3,
217 | 	                    3, 3, 3, 3, 3, 3, 4, 4, 4, 5])
218 | 	sample2 = np.array([5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4,
219 | 	                    4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2])
220 | 	ans = 3.632233
221 | 
222 | 	assert np.allclose(c.calc_tstat(sample1, sample2), ans)
223 | 
224 | 
225 | def test_calc_sample_sizes():
226 | 
227 | 	D1 = np.array([0, 1, 0, 1, 0, 1])
228 | 	ans1 = (2, 1, 1, 2)
229 | 	assert_equal(c.calc_sample_sizes(D1), ans1)
230 | 
231 | 	D2 = np.array([0, 1, 0, 1, 0])
232 | 	ans2 = (1, 1, 2, 1)
233 | 	assert_equal(c.calc_sample_sizes(D2), ans2)
234 | 
235 | 	D3 = np.array([1, 1, 1, 1, 1, 1])
236 | 	ans3 = (0, 3, 0, 3)
237 | 	assert_equal(c.calc_sample_sizes(D3), ans3)
238 | 
239 | 	D4 = np.array([0, 0, 0])
240 | 	ans4 = (1, 0, 2, 0)
241 | 	assert_equal(c.calc_sample_sizes(D4), ans4)
242 | 
243 | 
244 | def test_select_blocks():
245 | 
246 | 	pscore1 = np.array([0.05, 0.06, 0.3, 0.4, 0.5, 0.6, 0.7, 0.95, 0.95])
247 | 	D1 = np.array([0, 0, 1, 1, 0, 0, 1, 1, 1])
248 | 	logodds1 = np.log(pscore1 / (1-pscore1))
249 | 	K1 = 1
250 | 	ans1 = np.array([0.05, 0.5, 0.5, 0.95])
251 | 	test1 = np.array(c.select_blocks(pscore1, logodds1, D1, K1, 0, 1))
252 | 	assert np.allclose(test1, ans1)
253 | 
254 | 	pscore2 = np.array([0.05, 0.06, 0.3, 0.4, 0.5, 0.6, 0.7, 0.95, 0.95])
255 | 	D2 = np.array([0, 0, 1, 1, 0, 0, 1, 1, 1])
256 | 	logodds2 = np.log(pscore1 / (1-pscore1))
257 | 	K2 = 2
258 | 	ans2 = np.array([0, 1])
259 | 	test2 = np.array(c.select_blocks(pscore2, logodds2, D2, K2, 0, 1))
260 | 	assert np.allclose(test2, ans2)
261 | 
262 | 


--------------------------------------------------------------------------------
/tests/test_data.py:
--------------------------------------------------------------------------------
 1 | from nose.tools import *
 2 | import numpy as np
 3 | 
 4 | import causalinference.core.data as d
 5 | 
 6 | 
 7 | def test_preprocess():
 8 | 
 9 | 	Y1 = np.array([[1.2], [3.45], [-6], [78.90]])
10 | 	D1 = np.array([[0], [1], [0.0], [1]])
11 | 	X1 = np.array([-1, 3, -5.6, 8.9])
12 | 	Y_out, D_out, X_out = d.preprocess(Y1, D1, X1)
13 | 
14 | 	ans1 = np.array([1.2, 3.45, -6, 78.9])
15 | 	assert np.array_equal(Y_out, ans1)
16 | 
17 | 	ans2 = np.array([0, 1.0, -0.0, 1])
18 | 	assert np.array_equal(D_out, ans2)
19 | 
20 | 	ans3 = np.array([[-1], [3], [-5.6], [8.9]])
21 | 	assert np.array_equal(X_out, ans3)
22 | 
23 | 
24 | 	Y2 = np.array([3, 98])
25 | 	D2 = np.array([[5], [21.9], [-53]])
26 | 	X2 = np.array([1, 3.14])
27 | 	assert_raises(IndexError, d.preprocess, Y2, D2, X2)
28 | 
29 | 
30 | def test_data():
31 | 
32 | 	Y1 = np.array([1.2, 3.45, -6, 78.90, -9, 8.7654])
33 | 	D1 = np.array([0, 1, 0, 1.0, 0.0, 1])
34 | 	X1 = np.array([[-1, 2], [3, -4], [-5.6, -7], [8.9, 0.0], [99, 877], [-666, 54321]])
35 | 	data = d.Data(Y1, D1, X1)
36 | 
37 | 	ans1 = np.array([1.2, 3.45, -6, 78.9, -9, 8.7654])
38 | 	assert np.array_equal(data['Y'], ans1)
39 | 
40 | 	ans2 = np.array([0, 1, 0, 1, 0, 1])
41 | 	assert np.array_equal(data['D'], ans2)
42 | 
43 | 	ans3 = np.array([[-1, 2], [3, -4], [-5.6, -7], [8.9, 0], [99, 877], [-666, 54321]])
44 | 	assert np.array_equal(data['X'], ans3)
45 | 
46 | 	ans4 = 6
47 | 	assert_equal(data['N'], ans4)
48 | 
49 | 	ans5 = 2
50 | 	assert_equal(data['K'], ans5)
51 | 
52 | 	ans6 = np.array([True, False, True, False, True, False])
53 | 	assert np.array_equal(data['controls'], ans6)
54 | 
55 | 	ans7 = np.array([False, True, False, True, False, True])
56 | 	assert np.array_equal(data['treated'], ans7)
57 | 
58 | 	ans8 = np.array([1.2, -6, -9])
59 | 	assert np.array_equal(data['Y_c'], ans8)
60 | 
61 | 	ans9 = np.array([3.45, 78.9, 8.7654])
62 | 	assert np.array_equal(data['Y_t'], ans9)
63 | 
64 | 	ans10 = np.array([[-1, 2], [-5.6, -7], [99, 877]])
65 | 	assert np.array_equal(data['X_c'], ans10)
66 | 
67 | 	ans11 = np.array([[3, -4], [8.9, 0], [-666, 54321]])
68 | 	assert np.array_equal(data['X_t'], ans11)
69 | 
70 | 	ans12 = 3
71 | 	assert_equal(data['N_t'], ans12)
72 | 
73 | 	ans13 = 3
74 | 	assert_equal(data['N_c'], ans13)
75 | 
76 | 	ans14 = 'int'
77 | 	assert_equal(data['D'].dtype, ans14)
78 | 
79 | 	ans15 = {'Y', 'D', 'X', 'N', 'K', 'controls', 'treated',
80 | 	         'Y_c', 'Y_t', 'X_c', 'X_t', 'N_c', 'N_t'}
81 | 	assert_equal(set(data.keys()), ans15)
82 | 
83 | 	Y2 = np.array([[1.2], [3.45], [-6], [78.90]])
84 | 	D2 = np.array([[0], [1], [0.0], [1]])
85 | 	X2 = np.array([[-1, 2], [3, -4], [-5.6, -7], [8.9, 0.0]])
86 | 	assert_raises(ValueError, d.Data, Y2, D2, X2)
87 | 
88 | 


--------------------------------------------------------------------------------
/tests/test_matching.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from nose.tools import *
  3 | import numpy as np
  4 | 
  5 | import causalinference.estimators.matching as m
  6 | 
  7 | 
  8 | def test_norm():
  9 | 
 10 | 	X_i = np.array([1, 7, 3])
 11 | 	X_m = np.array([[4, 2, 5], [9, 8, 6]])
 12 | 
 13 | 	W1 = np.array([0.5, 1, 0.25])
 14 | 	ans1 = np.array([30.5, 35.25])
 15 | 	assert np.array_equal(m.norm(X_i, X_m, W1), ans1)
 16 | 
 17 | 	W2 = np.array([[0.5, -0.1, 0.7], [-0.1, 1, 3], [0.7, 3, 0.25]])
 18 | 	ans2 = np.array([-18.1, 85.25])
 19 | 	assert np.array_equal(m.norm(X_i, X_m, W2), ans2)
 20 | 
 21 | 
 22 | def test_smallestm():
 23 | 
 24 | 	d1 = np.array([1, 3, 2])
 25 | 	m1 = 1
 26 | 	ans1 = np.array([0])
 27 | 	assert_equal(set(m.smallestm(d1, m1)), set(ans1))
 28 | 
 29 | 	d2 = np.array([1, 3, 2])
 30 | 	m2 = 2
 31 | 	ans2 = np.array([0, 2])
 32 | 	assert_equal(set(m.smallestm(d2, m2)), set(ans2))
 33 | 
 34 | 	d3 = np.array([9, 2, 5, 9, 1, 2, 7])
 35 | 	m3 = 1
 36 | 	ans3 = np.array([4])
 37 | 	assert_equal(set(m.smallestm(d3, m3)), set(ans3))
 38 | 
 39 | 	d4 = np.array([9, 2, 5, 9, 1, 2, 7])
 40 | 	m4 = 2
 41 | 	ans4 = np.array([4, 1, 5])
 42 | 	assert_equal(set(m.smallestm(d4, m4)), set(ans4))
 43 | 
 44 | 	d5 = np.array([9, 2, 5, 9, 1, 2, 7])
 45 | 	m5 = 3
 46 | 	ans5 = np.array([4, 1, 5])
 47 | 	assert_equal(set(m.smallestm(d5, m5)), set(ans5))
 48 | 
 49 | 	d6 = np.array([9, 2, 5, 9, 1, 2, 7])
 50 | 	m6 = 4
 51 | 	ans6 = np.array([4, 1, 5, 2])
 52 | 	assert_equal(set(m.smallestm(d6, m6)), set(ans6))
 53 | 
 54 | 	d7 = np.array([-3.2, -3.2, 9.66, -3.2, 28.4])
 55 | 	m7 = 1
 56 | 	ans7 = np.array([0, 1, 3])
 57 | 	assert_equal(set(m.smallestm(d7, m7)), set(ans7))
 58 | 
 59 | 
 60 | def test_match():
 61 | 
 62 | 	X_i = np.array([1, 7, 3])
 63 | 	X_m = np.array([[9, 8, 6], [4, 2, 5]])
 64 | 
 65 | 	W1 = np.array([0.5, 1, 0.25])
 66 | 	m1 = 1
 67 | 	ans1 = np.array([1])
 68 | 	assert_equal(set(m.match(X_i, X_m, W1, m1)), set(ans1))
 69 | 
 70 | 	W2 = np.array([[0.5, -0.1, 0.7], [-0.1, 1, 3], [0.7, 3, 0.25]])
 71 | 	m2 = 1
 72 | 	ans2 = np.array([1])
 73 | 	assert_equal(set(m.match(X_i, X_m, W2, m2)), set(ans2))
 74 | 
 75 | 
 76 | def test_bias_coefs():
 77 | 
 78 | 	Y_m = np.array([4, 2, 5, 2])
 79 | 	X_m = np.array([[7, 6], [5, 4], [2, 3], [3, 5]])
 80 | 	matches = [np.array([1, 0, 2]), np.array([1, 2]),
 81 | 	           np.array([2, 0]), np.array([0]), np.array([0, 1])]
 82 | 
 83 | 	ans = np.array([-2, 3])
 84 | 	assert np.allclose(m.bias_coefs(matches, Y_m, X_m), ans)
 85 | 
 86 | 
 87 | def test_bias():
 88 | 
 89 | 	X = np.array([[1, 2, 3], [-3, -2, -1]])
 90 | 	X_m = np.array([[4, 2, 6], [5, 7, 3], [9, 4, 1]])
 91 | 	matches = [np.array([0, 1, 2]), np.array([1])]
 92 | 	coefs = np.array([-2, 0, 3])
 93 | 
 94 | 	ans = np.array([-9, -4])
 95 | 	assert np.allclose(m.bias(X, X_m, matches, coefs), ans)
 96 | 
 97 | 
 98 | def test_scaled_counts():
 99 | 
100 | 	N = 10
101 | 	matches = [np.array([3, 0, 1]), np.array([7]), np.array([1, 9])]
102 | 
103 | 	ans = np.array([1/3, 1/3+1/2, 0, 1/3, 0, 0, 0, 1, 0, 1/2])
104 | 	assert np.allclose(m.scaled_counts(N, matches), ans)
105 | 
106 | 
107 | def test_calc_atx_var():
108 | 
109 | 	vars_c = np.array([1, 2])
110 | 	vars_t = np.array([0.5, 1, 0.25])
111 | 	weights_c = np.array([1.5, 0.5])
112 | 	weights_t = np.array([1, 1, 1])
113 | 
114 | 	out_var = m.calc_atx_var(vars_c, vars_t, weights_c, weights_t)
115 | 	ans = 0.8819444
116 | 	assert np.allclose(out_var, ans)
117 | 	
118 | 
119 | def test_calc_atc_se():
120 | 
121 | 	vars_c = np.array([1, 2])
122 | 	vars_t = np.array([0.5, 1, 0.25])
123 | 	scaled_counts_t = np.array([1, 1, 0])
124 | 
125 | 	out_se = m.calc_atc_se(vars_c, vars_t, scaled_counts_t)
126 | 	ans = 1.0606602
127 | 	assert np.allclose(out_se, ans)
128 | 
129 | 
130 | def test_calc_att_se():
131 | 
132 | 	vars_c = np.array([1, 2])
133 | 	vars_t = np.array([0.5, 1, 0.25])
134 | 	scaled_counts_c = np.array([1, 2])
135 | 
136 | 	out_se = m.calc_att_se(vars_c, vars_t, scaled_counts_c)
137 | 	ans = 1.0929064
138 | 	assert np.allclose(out_se, ans)
139 | 
140 | 
141 | def test_calc_ate_se():
142 | 
143 | 	vars_c = np.array([1, 2])
144 | 	vars_t = np.array([0.5, 1, 0.25])
145 | 	scaled_counts_c = np.array([1, 2])
146 | 	scaled_counts_t = np.array([1, 1, 0])
147 | 
148 | 	out_se = m.calc_ate_se(vars_c, vars_t, scaled_counts_c, scaled_counts_t)
149 | 	ans = 1.0630146
150 | 	assert np.allclose(out_se, ans)
151 | 
152 | 


--------------------------------------------------------------------------------
/tests/test_ols.py:
--------------------------------------------------------------------------------
  1 | from nose.tools import *
  2 | import numpy as np
  3 | 
  4 | import causalinference.estimators.ols as o
  5 | import causalinference.core.data as d
  6 | 
  7 | 
  8 | def test_form_matrix():
  9 | 
 10 | 	D = np.array([0, 1, 0, 1])
 11 | 	X = np.array([[1], [2], [3], [4]])
 12 | 
 13 | 	adj1 = 0
 14 | 	ans1 = np.array([[1, 0], [1, 1], [1, 0], [1, 1]])
 15 | 	assert np.array_equal(o.form_matrix(D, X, adj1), ans1)
 16 | 
 17 | 	adj2 = 1
 18 | 	ans2 = np.array([[1, 0, -1.5], [1, 1, -0.5],
 19 | 	                 [1, 0, 0.5], [1, 1, 1.5]])
 20 | 	assert np.array_equal(o.form_matrix(D, X, adj2), ans2)
 21 | 
 22 | 	adj3 = 2
 23 | 	ans3 = np.array([[1, 0, -1.5, 0], [1, 1, -0.5, -0.5],
 24 | 	                [1, 0, 0.5, 0], [1, 1, 1.5, 1.5]])
 25 | 	assert np.array_equal(o.form_matrix(D, X, adj3), ans3)
 26 | 
 27 | 
 28 | def test_calc_ate():
 29 | 
 30 | 	olscoef = np.array([1, 2, 3, 4])
 31 | 	ans = 2
 32 | 
 33 | 	assert_equal(o.calc_ate(olscoef), ans)
 34 | 
 35 | 
 36 | def test_calc_atx():
 37 | 
 38 | 	olscoef = np.array([1, 2, 3, 4, 5, 6])
 39 | 	meandiff = np.array([7, 8])
 40 | 	ans = 85
 41 | 
 42 | 	assert_equal(o.calc_atx(olscoef, meandiff), ans)
 43 | 
 44 | 
 45 | def test_calc_cov():
 46 | 
 47 | 	Z = np.array([[4, 4, 4, 2, 1, 3], [4, 2, 2, 6, 2, 2],
 48 | 	              [3, 4, 2, 1, 3, 1], [2, 3, 0, 0, 1, 2],
 49 | 		      [4, 3, 2, 1, 4, 2], [2, 5, 4, 2, 2, 0]])
 50 | 	u = np.array([1, 3, 6, 4, 3, 1])
 51 | 	ans = np.array([[434.755102, 8.442177, -87.529252,
 52 | 	                 -77.227211, -204.360544, -354.38095],
 53 | 			[8.442177, 1.988662, -3.601814,
 54 | 			 -1.224943, -4.913832, -6.68254],
 55 | 			[-87.529252, -3.601814, 19.817710,
 56 | 			 15.136009, 41.933787, 71.05079],
 57 | 			[-77.227211, -1.224943, 15.136009,
 58 | 			 14.185125, 35.989569, 62.89841],
 59 | 			[-204.360544, -4.913831, 41.933787,
 60 | 			 35.989569, 97.145125, 166.58730],
 61 | 			[-354.380952, -6.682540, 71.050794,
 62 | 			 62.898413, 166.587302, 289.11111]])
 63 | 
 64 | 	assert np.allclose(o.calc_cov(Z, u), ans)
 65 | 
 66 | 
 67 | def test_submatrix():
 68 | 
 69 | 	cov = np.array([[1, 2, 3, 4, 5, 6], [7, 9, 8, 9, 8, 7],
 70 | 	                [1, 2, 3, 4, 5, 6], [7, 8, 9, 1, 2, 3],
 71 | 			[4, 6, 5, 6, 5, 4], [7, 3, 8, 9, 2, 1]])
 72 | 	ans = np.array([[9, 8, 7], [6, 5, 4], [3, 2, 1]])
 73 | 
 74 | 	assert np.allclose(o.submatrix(cov), ans)
 75 | 
 76 | 
 77 | def test_calc_ate_se():
 78 | 
 79 | 	subcov = np.array([[9, 8, 7], [6, 5, 4], [3, 2, 1]])
 80 | 	ans = np.sqrt(5)
 81 | 
 82 | 	assert_equal(o.calc_ate_se(subcov), ans)
 83 | 
 84 | 
 85 | def test_calc_atx_se():
 86 | 
 87 | 	cov = np.array([[1, 2, 3, 4, 5, 6], [7, 9, 8, 9, 8, 7],
 88 | 	                [1, 2, 3, 4, 5, 6], [7, 8, 9, 1, 2, 3],
 89 | 			[4, 6, 5, 6, 5, 4], [7, 3, 8, 9, 2, 1]])
 90 | 	meandiff = np.array([3, 7])
 91 | 	ans = 18.46619
 92 | 
 93 | 	assert np.allclose(o.calc_atx_se(cov, meandiff), ans)
 94 | 
 95 | 
 96 | def test_ols():
 97 | 
 98 | 	Y = np.array([52, 30, 5, 29, 12, 10, 44, 87])
 99 | 	D = np.array([0, 0, 0, 0, 1, 1, 1, 1])
100 | 	X = np.array([[1, 42], [3, 32], [9, 7], [12, 86],
101 | 	              [5, 94], [4, 36], [2, 13], [6, 61]])
102 | 	data = d.Data(Y, D, X)
103 | 
104 | 	adj1 = 0
105 | 	ols1 = o.OLS(data, adj1)
106 | 	ate1 = 9.25
107 | 	ate_se1 = 17.68253
108 | 	keys1 = {'ate', 'ate_se'}
109 | 	assert np.allclose(ols1['ate'], ate1)
110 | 	assert np.allclose(ols1['ate_se'], ate_se1)
111 | 	assert_equal(set(ols1.keys()), keys1)
112 | 
113 | 	adj2 = 1
114 | 	ols2 = o.OLS(data, adj2)
115 | 	ate2 = 3.654552
116 | 	ate_se2 = 17.749993
117 | 	keys2 = {'ate', 'ate_se'}
118 | 	assert np.allclose(ols2['ate'], ate2)
119 | 	assert np.allclose(ols2['ate_se'], ate_se2)
120 | 	assert_equal(set(ols2.keys()), keys2)
121 | 
122 | 	adj3 = 2
123 | 	ols3 = o.OLS(data, adj3)
124 | 	ate3 = 30.59444
125 | 	atc3 = 63.2095
126 | 	att3 = -2.020611
127 | 	ate_se3 = 19.91887865
128 | 	atc_se3 = 29.92152
129 | 	att_se3 = 11.8586
130 | 	keys3 = {'ate', 'atc', 'att', 'ate_se', 'atc_se', 'att_se'}
131 | 	assert np.allclose(ols3['ate'], ate3)
132 | 	assert np.allclose(ols3['atc'], atc3)
133 | 	assert np.allclose(ols3['att'], att3)
134 | 	assert np.allclose(ols3['ate_se'], ate_se3)
135 | 	assert np.allclose(ols3['atc_se'], atc_se3)
136 | 	assert np.allclose(ols3['att_se'], att_se3)
137 | 	assert_equal(set(ols3.keys()), keys3)
138 | 
139 | 


--------------------------------------------------------------------------------
/tests/test_propensity.py:
--------------------------------------------------------------------------------
  1 | from nose.tools import *
  2 | import numpy as np
  3 | 
  4 | import causalinference.core.data as d
  5 | import causalinference.core.propensity as p
  6 | from utils import random_data
  7 | 
  8 | 
  9 | def test_form_matrix():
 10 | 
 11 | 	X = np.array([[1, 3], [5, 7], [8, 6], [4, 2]])
 12 | 
 13 | 	ans0 = np.column_stack((np.ones(4), X))
 14 | 	assert np.array_equal(p.form_matrix(X, [0, 1], []), ans0)
 15 | 
 16 | 	lin1 = [0]
 17 | 	qua1 = [(0, 1), (1, 1)]
 18 | 	ans1 = np.array([[1, 1, 3, 9], [1, 5, 35, 49],
 19 | 	                 [1, 8, 48, 36], [1, 4, 8, 4]])
 20 | 	assert np.array_equal(p.form_matrix(X, lin1, qua1), ans1)
 21 | 
 22 | 	lin2 = [0]
 23 | 	qua2 = [(1, 0), (1, 1)]
 24 | 	ans2 = np.array([[1, 1, 3, 9], [1, 5, 35, 49],
 25 | 	                 [1, 8, 48, 36], [1, 4, 8, 4]])
 26 | 	assert np.array_equal(p.form_matrix(X, lin2, qua2), ans2)
 27 | 
 28 | 	lin3 = [0, 1]
 29 | 	qua3 = [(0, 0)]
 30 | 	ans3 = np.array([[1, 1, 3, 1], [1, 5, 7, 25],
 31 | 	                 [1, 8, 6, 64], [1, 4, 2, 16]])
 32 | 	assert np.array_equal(p.form_matrix(X, lin3, qua3), ans3)
 33 | 
 34 | 
 35 | def test_sigmoid():
 36 | 
 37 | 	x = np.array([0, 10000, -10000, 5])
 38 | 	ans = np.array([0.5, 1.0, 0.0, 1/(1+np.exp(-5))])
 39 | 	assert np.array_equal(p.sigmoid(x), ans)
 40 | 
 41 | 
 42 | def test_log1exp():
 43 | 
 44 | 	x = np.array([0, 10000, -10000, 5])
 45 | 	ans = np.array([np.log(2), 0.0, 10000, np.log(1+np.exp(-5))])
 46 | 	assert np.array_equal(p.log1exp(x), ans)
 47 | 
 48 | 
 49 | def test_neg_loglike():
 50 | 
 51 | 	beta = np.array([1, 2])
 52 | 	X_c = np.array([[100, 50], [-2, 1], [-500, -1300], [1, 0]])
 53 | 	X_t = np.array([[0, 0], [50, 25], [-50, -75], [0, -0.5]])
 54 | 	ans = 2 * (200 + np.log(2) + np.log(1+np.e))
 55 | 	assert_equal(p.neg_loglike(beta, X_c, X_t), ans)
 56 | 
 57 | 
 58 | def test_neg_gradient():
 59 | 
 60 | 	beta = np.array([2, -1])
 61 | 	X_c = np.array([[1, 2], [125, 50]])
 62 | 	X_t = np.array([[50, 0], [2.5, 4]])
 63 | 	ans = np.array([125.5 - 2.5/(1+np.e), 51 - 4/(1+np.e)])
 64 | 	assert np.array_equal(p.neg_gradient(beta, X_c, X_t), ans)
 65 | 
 66 | 
 67 | def test_calc_coef():
 68 | 
 69 | 	X_c = np.array([[1, 1, 8], [1, 8, 5]])
 70 | 	X_t = np.array([[1, 10, 2], [1, 5, 8]])
 71 | 	ans = np.array([-6.9441137, 0.6608454, 0.4900669])
 72 | 
 73 | 	assert np.allclose(p.calc_coef(X_c, X_t), ans)
 74 | 
 75 | 
 76 | def test_calc_se():
 77 | 
 78 | 	Z = np.array([[1, 64, 188], [1, 132, 59], [1, 106, 72], [1, 86, 154]])
 79 | 	phat = np.array([0.5101151, 0.3062871, 0.8566664, 0.3269315])
 80 | 	ans = np.array([25.56301220, 0.16572624, 0.07956535])
 81 | 
 82 | 	assert np.allclose(p.calc_se(Z, phat), ans)
 83 | 
 84 | 
 85 | def test_propensity():
 86 | 
 87 | 	D = np.array([0, 0, 0, 1, 1, 1])
 88 | 	X = np.array([[7, 8], [3, 10], [7, 10], [4, 7], [5, 10], [9, 8]])
 89 | 	Y = random_data(D_cur=D, X_cur=X)
 90 | 
 91 | 	data = d.Data(Y, D, X)
 92 | 	propensity = p.Propensity(data, [0, 1], [])
 93 | 	lin = [0, 1]
 94 | 	qua = []
 95 | 	coef = np.array([6.8066090, -0.0244874, -0.7524939])
 96 | 	loglike = -3.626517
 97 | 	fitted = np.array([0.6491366, 0.3117840, 0.2911631,
 98 | 	                   0.8086407, 0.3013733, 0.6379023])
 99 | 	se = np.array([8.5373779, 0.4595191, 0.8106499])
100 | 	keys = {'lin', 'qua', 'coef', 'loglike', 'fitted', 'se'}
101 | 	
102 | 	assert_equal(propensity['lin'], lin)
103 | 	assert_equal(propensity['qua'], qua)
104 | 	assert np.allclose(propensity['coef'], coef)
105 | 	assert np.allclose(propensity['loglike'], loglike)
106 | 	assert np.allclose(propensity['fitted'], fitted)
107 | 	assert np.allclose(propensity['se'], se)
108 | 	assert_equal(set(propensity.keys()), keys)
109 | 
110 | 


--------------------------------------------------------------------------------
/tests/test_propensityselect.py:
--------------------------------------------------------------------------------
  1 | from nose.tools import *
  2 | import numpy as np
  3 | 
  4 | import causalinference.core.data as d
  5 | import causalinference.core.propensity as p
  6 | from utils import random_data
  7 | 
  8 | 
  9 | def test_get_excluded_lin():
 10 | 
 11 | 	K1 = 4
 12 | 	included1 = []
 13 | 	ans1 = [0, 1, 2, 3]
 14 | 	assert_equal(p.get_excluded_lin(K1, included1), ans1)
 15 | 
 16 | 	K2 = 4
 17 | 	included2 = [3, 1]
 18 | 	ans2 = [0, 2]
 19 | 	assert_equal(p.get_excluded_lin(K2, included2), ans2)
 20 | 
 21 | 	K3 = 3
 22 | 	included3 = [0, 1, 2]
 23 | 	ans3 = []
 24 | 	assert_equal(p.get_excluded_lin(K3, included3), ans3)
 25 | 
 26 | 
 27 | def test_get_excluded_qua():
 28 | 
 29 | 	lin1 = [0, 2, 3]
 30 | 	qua1 = [(0, 3), (3, 3)]
 31 | 	ans1 = [(0, 0), (0, 2), (2, 2), (2, 3)]
 32 | 	assert_equal(p.get_excluded_qua(lin1, qua1), ans1)
 33 | 
 34 | 	lin2 = [1, 2]
 35 | 	qua2 = []
 36 | 	ans2 = [(1, 1), (1, 2), (2, 2)]
 37 | 	assert_equal(p.get_excluded_qua(lin2, qua2), ans2)
 38 | 
 39 | 	lin3 = [8, 5]
 40 | 	qua3 = [(8, 8), (8, 5), (5, 5)]
 41 | 	ans3 = []
 42 | 	assert_equal(p.get_excluded_qua(lin3, qua3), ans3)
 43 | 
 44 | 
 45 | def test_calc_loglike():
 46 | 
 47 | 	X_c = np.array([[1, 2], [3, 7]])
 48 | 	X_t = np.array([[1, 4], [3, 6]])
 49 | 	lin = [1]
 50 | 	qua = [(0, 0)]
 51 | 	ans = -2.567814
 52 | 	assert np.allclose(p.calc_loglike(X_c, X_t, lin, qua), ans)
 53 | 
 54 | 
 55 | def test_select_lin():
 56 | 
 57 | 	Y, D, X = random_data(K=4)
 58 | 	X_c_random, X_t_random = X[D==0], X[D==1]
 59 | 
 60 | 	lin1 = [0, 1, 2, 3]
 61 | 	C1 = np.random.rand(1)
 62 | 	ans1 = [0, 1, 2, 3]
 63 | 	assert_equal(p.select_lin(X_c_random, X_t_random, lin1, C1), ans1)
 64 | 
 65 | 	X_c = np.array([[1, 2], [9, 7]])
 66 | 	X_t = np.array([[1, 4], [9, 6]])
 67 | 
 68 | 	lin2 = []
 69 | 	C2 = 0.07
 70 | 	ans2 = []
 71 | 	assert_equal(p.select_lin(X_c, X_t, lin2, C2), ans2)
 72 | 
 73 | 	lin3 = []
 74 | 	C3 = 0.06
 75 | 	ans3 = [1, 0]
 76 | 	assert_equal(p.select_lin(X_c, X_t, lin3, C3), ans3)
 77 | 
 78 | 	lin4 = [1]
 79 | 	C4 = 0.35
 80 | 	ans4 = [1]
 81 | 	assert_equal(p.select_lin(X_c, X_t, lin4, C4), ans4)
 82 | 
 83 | 	lin5 = [1]
 84 | 	C5 = 0.34
 85 | 	ans5 = [1, 0]
 86 | 	assert_equal(p.select_lin(X_c, X_t, lin5, C5), ans5)
 87 | 
 88 | 
 89 | def test_select_lin_terms():
 90 | 
 91 | 	Y, D, X = random_data(K=4)
 92 | 	X_c_random, X_t_random = X[D==0], X[D==1]
 93 | 
 94 | 	lin1 = [3, 0, 1]
 95 | 	C1 = np.inf
 96 | 	ans1 = [3, 0, 1]
 97 | 	assert_equal(p.select_lin_terms(X_c_random, X_t_random, lin1, C1), ans1)
 98 | 
 99 | 	lin2 = [2]
100 | 	C2 = 0
101 | 	ans2 = [2, 0, 1, 3]
102 | 	assert_equal(p.select_lin_terms(X_c_random, X_t_random, lin2, C2), ans2)
103 | 	
104 | 	lin3 = []
105 | 	C3 = 0
106 | 	ans3 = [0, 1, 2, 3]
107 | 	assert_equal(p.select_lin_terms(X_c_random, X_t_random, lin3, C3), ans3)
108 | 	
109 | 	lin4 = [3, 1]
110 | 	C4 = -34.234
111 | 	ans4 = [3, 1, 0, 2]
112 | 	assert_equal(p.select_lin_terms(X_c_random, X_t_random, lin4, C4), ans4)
113 | 
114 | 	X_c = np.array([[1, 2], [9, 7]])
115 | 	X_t = np.array([[1, 4], [9, 7]])
116 | 
117 | 	lin5 = []
118 | 	C5 = 0.06
119 | 	ans5 = [1, 0]
120 | 	assert_equal(p.select_lin_terms(X_c, X_t, lin5, C5), ans5)
121 | 
122 | 
123 | def test_select_qua():
124 | 
125 | 	Y, D, X = random_data()
126 | 	X_c_random, X_t_random = X[D==0], X[D==1]
127 | 
128 | 	lin1 = [1, 0]
129 | 	qua1 = [(1, 0), (0, 0), (1, 1)]
130 | 	C1 = np.random.rand(1)
131 | 	ans1 = [(1, 0), (0, 0), (1, 1)]
132 | 	assert_equal(p.select_qua(X_c_random, X_t_random, lin1, qua1, C1), ans1)
133 | 
134 | 	lin2 = [1]
135 | 	qua2 = [(1, 1)]
136 | 	C2 = np.random.rand(1)
137 | 	ans2 = [(1, 1)]
138 | 	assert_equal(p.select_qua(X_c_random, X_t_random, lin2, qua2, C2), ans2)
139 | 
140 | 	X_c = np.array([[7, 8], [3, 10], [7, 10]])
141 | 	X_t = np.array([[4, 7], [5, 10], [9, 8]])
142 | 
143 | 	lin3 = [0, 1]
144 | 	qua3 = []
145 | 	C3 = 1.2
146 | 	ans3 = []
147 | 	assert_equal(p.select_qua(X_c, X_t, lin3, qua3, C3), ans3)
148 | 
149 | 	lin4 = [0, 1]
150 | 	qua4 = []
151 | 	C4 = 1.1
152 | 	ans4 = [(1, 1), (0, 1), (0, 0)]
153 | 	assert_equal(p.select_qua(X_c, X_t, lin4, qua4, C4), ans4)
154 | 
155 | 	lin5 = [0, 1]
156 | 	qua5 = [(1, 1)]
157 | 	C5 = 2.4
158 | 	ans5 = [(1, 1)]
159 | 	assert_equal(p.select_qua(X_c, X_t, lin5, qua5, C5), ans5)
160 | 
161 | 	lin6 = [0, 1]
162 | 	qua6 = [(1, 1)]
163 | 	C6 = 2.3
164 | 	ans6 = [(1, 1), (0, 1), (0, 0)]
165 | 	assert_equal(p.select_qua(X_c, X_t, lin6, qua6, C6), ans6)
166 | 
167 | 	lin7 = [0, 1]
168 | 	qua7 = [(1, 1), (0, 1)]
169 | 	C7 = 3.9
170 | 	ans7 = [(1, 1), (0, 1)]
171 | 	assert_equal(p.select_qua(X_c, X_t, lin7, qua7, C7), ans7)
172 | 
173 | 	lin8 = [0, 1]
174 | 	qua8 = [(1, 1), (0, 1)]
175 | 	C8 = 3.8
176 | 	ans8 = [(1, 1), (0, 1), (0, 0)]
177 | 	assert_equal(p.select_qua(X_c, X_t, lin8, qua8, C8), ans8)
178 | 
179 | 
180 | def test_select_qua_terms():
181 | 
182 | 	Y, D, X = random_data()
183 | 	X_c_random, X_t_random = X[D==0], X[D==1]
184 | 
185 | 	lin1 = [0, 1]
186 | 	C1 = np.inf
187 | 	ans1 = []
188 | 	assert_equal(p.select_qua_terms(X_c_random, X_t_random, lin1, C1), ans1)
189 | 
190 | 	lin2 = [1, 0]
191 | 	C2 = 0
192 | 	ans2 = [(1, 1), (1, 0), (0, 0)]
193 | 	assert_equal(p.select_qua_terms(X_c_random, X_t_random, lin2, C2), ans2)
194 | 	
195 | 	lin3 = [0]
196 | 	C3 = -983.340
197 | 	ans3 = [(0, 0)]
198 | 	assert_equal(p.select_qua_terms(X_c_random, X_t_random, lin3, C3), ans3)
199 | 	
200 | 	lin4 = []
201 | 	C4 = 34.234
202 | 	ans4 = []
203 | 	assert_equal(p.select_qua_terms(X_c_random, X_t_random, lin4, C4), ans4)
204 | 
205 | 	X_c = np.array([[7, 8], [3, 10], [7, 10]])
206 | 	X_t = np.array([[4, 7], [5, 10], [9, 8]])
207 | 
208 | 	lin5 = [0, 1]
209 | 	C5 = 1.1
210 | 	ans5 = [(1, 1), (0, 1), (0, 0)]
211 | 	assert_equal(p.select_qua_terms(X_c, X_t, lin5, C5), ans5)
212 | 
213 | 
214 | def test_propensityselect():
215 | 
216 | 	D = np.array([0, 0, 0, 1, 1, 1])
217 | 	X = np.array([[7, 8], [3, 10], [7, 10], [4, 7], [5, 10], [9, 8]])
218 | 	Y = random_data(D_cur=D, X_cur=X)
219 | 	data = d.Data(Y, D, X)
220 | 
221 | 	propensity1 = p.PropensitySelect(data, [], 1, 2.71)
222 | 	lin1 = [1]
223 | 	qua1 = []
224 | 	coef1 = np.array([6.5424027, -0.7392041])
225 | 	loglike1 = -3.627939
226 | 	fitted1 = np.array([0.6522105, 0.2995088, 0.2995088,
227 | 	                   0.7970526, 0.2995088, 0.6522105])
228 | 	se1 = np.array([6.8455179, 0.7641445])
229 | 	keys = {'lin', 'qua', 'coef', 'loglike', 'fitted', 'se'}
230 | 	
231 | 	assert_equal(propensity1['lin'], lin1)
232 | 	assert_equal(propensity1['qua'], qua1)
233 | 	assert np.allclose(propensity1['coef'], coef1)
234 | 	assert np.allclose(propensity1['loglike'], loglike1)
235 | 	assert np.allclose(propensity1['fitted'], fitted1)
236 | 	assert np.allclose(propensity1['se'], se1)
237 | 	assert_equal(set(propensity1.keys()), keys)
238 | 
239 | 
240 | 	propensity2 = p.PropensitySelect(data, [0, 1], 1, 2.71)
241 | 	lin2 = [0, 1]
242 | 	qua2 = []
243 | 	coef2 = np.array([6.8066090, -0.0244874, -0.7524939])
244 | 	loglike2 = -3.626517
245 | 	fitted2 = np.array([0.6491366, 0.3117840, 0.2911631,
246 | 	                    0.8086407, 0.3013733, 0.6379023])
247 | 	se2 = np.array([8.5373779, 0.4595191, 0.8106499])
248 | 
249 | 	assert_equal(propensity2['lin'], lin2)
250 | 	assert_equal(propensity2['qua'], qua2)
251 | 	assert np.allclose(propensity2['coef'], coef2)
252 | 	assert np.allclose(propensity2['loglike'], loglike2)
253 | 	assert np.allclose(propensity2['fitted'], fitted2)
254 | 	assert np.allclose(propensity2['se'], se2)
255 | 
256 | 


--------------------------------------------------------------------------------
/tests/test_summary.py:
--------------------------------------------------------------------------------
 1 | from nose.tools import *
 2 | import numpy as np
 3 | 
 4 | import causalinference.core.data as d
 5 | import causalinference.core.summary as s
 6 | 
 7 | 
 8 | def test_calc_ndiff():
 9 | 
10 | 	ans = -1/np.sqrt(2.5)
11 | 	assert_equal(s.calc_ndiff(4, 3, 2, 1), ans)
12 | 
13 | 
14 | def test_summary():
15 | 
16 | 	Y = np.array([1, 2, 3, 4, 6, 5])
17 | 	D = np.array([0, 0, 1, 1, 0, 1])
18 | 	X = np.array([[1, 3], [5, 7], [8, 6], [4, 2], [9, 11], [12, 10]])
19 | 	data = d.Data(Y, D, X)
20 | 	summary = s.Summary(data)
21 | 
22 | 	N = 6
23 | 	K = 2
24 | 	N_c = 3
25 | 	N_t = 3
26 | 	Y_c_mean = 3
27 | 	Y_t_mean = 4
28 | 	Y_c_sd = np.sqrt(7)
29 | 	Y_t_sd = 1
30 | 	rdiff = 1
31 | 	X_c_mean = np.array([5, 7])
32 | 	X_t_mean = np.array([8, 6])
33 | 	X_c_sd = np.array([4, 4])
34 | 	X_t_sd = np.array([4, 4])
35 | 	ndiff = np.array([0.75, -0.25])
36 | 	keys1 = {'N', 'K', 'N_c', 'N_t', 'Y_c_mean', 'Y_t_mean', 'Y_c_sd', 'Y_t_sd',
37 | 	         'X_c_mean', 'X_t_mean', 'X_c_sd', 'X_t_sd', 'rdiff', 'ndiff'}
38 | 
39 | 	assert_equal(summary['N'], N)
40 | 	assert_equal(summary['N_c'], N_c)
41 | 	assert_equal(summary['N_t'], N_t)
42 | 	assert_equal(summary['Y_c_mean'], Y_c_mean)
43 | 	assert_equal(summary['Y_t_mean'], Y_t_mean)
44 | 	assert_equal(summary['Y_c_sd'], Y_c_sd)
45 | 	assert_equal(summary['Y_t_sd'], Y_t_sd)
46 | 	assert_equal(summary['rdiff'], rdiff)
47 | 	assert np.array_equal(summary['X_c_mean'], X_c_mean)
48 | 	assert np.array_equal(summary['X_t_mean'], X_t_mean)
49 | 	assert np.array_equal(summary['X_c_sd'], X_c_sd)
50 | 	assert np.array_equal(summary['X_t_sd'], X_t_sd)
51 | 	assert np.array_equal(summary['ndiff'], ndiff)
52 | 	assert_equal(set(summary.keys()), keys1)
53 | 
54 | 	p_c = np.array([0.3, 0.5, 0.7])
55 | 	p_t = np.array([0.1, 0.5, 0.9])
56 | 	summary._summarize_pscore(p_c, p_t)
57 | 	keys2 = {'N', 'K', 'N_c', 'N_t', 'Y_c_mean', 'Y_t_mean', 'Y_c_sd', 'Y_t_sd',
58 | 	         'X_c_mean', 'X_t_mean', 'X_c_sd', 'X_t_sd', 'rdiff', 'ndiff',
59 | 		 'p_min', 'p_max', 'p_c_mean', 'p_t_mean'}
60 | 	
61 | 	assert_equal(summary['p_min'], 0.1)
62 | 	assert_equal(summary['p_max'], 0.9)
63 | 	assert_equal(summary['p_c_mean'], 0.5)
64 | 	assert_equal(summary['p_t_mean'], 0.5)
65 | 	assert_equal(set(summary.keys()), keys2)
66 | 
67 | 


--------------------------------------------------------------------------------
/tests/test_tools.py:
--------------------------------------------------------------------------------
 1 | from nose.tools import *
 2 | import numpy as np
 3 | 
 4 | import causalinference.utils.tools as t
 5 | 
 6 | 
 7 | def test_convert_to_formatting():
 8 | 
 9 | 	entry_types = ['string', 'float', 'integer', 'float']
10 | 	ans = ['s', '.3f', '.0f', '.3f']
11 | 
12 | 	assert_equal(list(t.convert_to_formatting(entry_types)), ans)
13 | 
14 | 
15 | def test_add_row():
16 | 
17 | 	entries1 = ('Variable', 'Mean', 'S.d.', 'Mean', 'S.d.', 'Raw diff')
18 | 	entry_types1 = ['string']*6
19 | 	col_spans1 = [1]*6
20 | 	width1 = 80
21 | 	ans1 = '       Variable         Mean         S.d.         Mean         S.d.     Raw diff\n'
22 | 	assert_equal(t.add_row(entries1, entry_types1, col_spans1, width1), ans1)
23 | 	
24 | 	entries2 = [12, 13.2, -3.14, 9.8765]
25 | 	entry_types2 = ['integer', 'integer', 'float', 'float']
26 | 	col_spans2 = [1, 2, 2, 1]
27 | 	width2 = 80
28 | 	ans2 = '             12                        13                    -3.140        9.877\n'
29 | 	assert_equal(t.add_row(entries2, entry_types2, col_spans2, width2), ans2)
30 | 
31 | 
32 | def test_add_line():
33 | 
34 | 	width = 30
35 | 	ans = '------------------------------\n'
36 | 
37 | 	assert_equal(t.add_line(width), ans)
38 | 
39 | 
40 | def test_gen_reg_entries():
41 | 
42 | 	varname = 'Income'
43 | 	coef = 0.5
44 | 	se = 0.25
45 | 	ans1 = 'Income'
46 | 	ans2 = 0.5
47 | 	ans3 = 0.25
48 | 	ans4 = 2
49 | 	ans5 = 0.045500
50 | 	ans6 = 0.01
51 | 	ans7 = 0.99
52 | 
53 | 	v, c, s, z, p, lw, up = t.gen_reg_entries(varname, coef, se)
54 | 	assert_equal(v, ans1)
55 | 	assert_equal(c, ans2)
56 | 	assert_equal(s, ans3)
57 | 	assert_equal(z, ans4)
58 | 	assert np.allclose(p, ans5)
59 | 	assert np.allclose(lw, ans6)
60 | 	assert np.allclose(up, ans7)
61 | 
62 | 


--------------------------------------------------------------------------------
/tests/test_weighting.py:
--------------------------------------------------------------------------------
 1 | from nose.tools import *
 2 | import numpy as np
 3 | 
 4 | import causalinference.estimators.weighting as w
 5 | import causalinference.core.data as d
 6 | 
 7 | 
 8 | def test_calc_weights():
 9 | 
10 | 	pscore = np.array([0.1, 0.25, 0.5, 0.75, 0.9])
11 | 	D = np.array([0, 1, 0, 1, 0])
12 | 
13 | 	ans = np.array([1.11111, 4, 2, 1.33333, 10])
14 | 	assert np.allclose(w.calc_weights(pscore, D), ans)
15 | 
16 | 
17 | def test_weigh_data():
18 | 
19 | 	Y = np.array([1, -2, 3, -5, 7])
20 | 	D = np.array([0, 1, 0, 1, 0])
21 | 	X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]])
22 | 	weights = np.array([1/0.9, 4, 2, 1/0.75, 10])
23 | 
24 | 	Y_ans = np.array([1.11111, -8, 6, -6.66667, 70])
25 | 	Z_ans = np.array([[1.11111, 0, 1.11111, 2.22222],
26 | 	                [4, 4, 12, 16],
27 | 			[2, 0, 10, 12],
28 | 			[1.33333, 1.33333, 9.33333, 10.66667],
29 | 			[10, 0, 90, 100]])
30 | 	Y_out, Z_out = w.weigh_data(Y, D, X, weights)
31 | 	assert np.allclose(Y_out, Y_ans)
32 | 	assert np.allclose(Z_out, Z_ans)
33 | 
34 | 
35 | def test_weighting():
36 | 
37 | 	Y = np.array([1, -2, 3, -5, 7])
38 | 	D = np.array([0, 1, 0, 1, 0])
39 | 	X = np.array([3, 2, 3, 5, 5])
40 | 	pscore = np.array([0.1, 0.25, 0.5, 0.75, 0.9])
41 | 	data = d.Data(Y, D, X)
42 | 	data._dict['pscore'] = pscore
43 | 
44 | 	weighting = w.Weighting(data)
45 | 	ate = -6.7963178
46 | 	ate_se = 2.8125913
47 | 	keys = {'ate', 'ate_se'}
48 | 	assert np.allclose(weighting['ate'], ate)
49 | 	assert np.allclose(weighting['ate_se'], ate_se)
50 | 	assert_equal(set(weighting.keys()), keys)
51 | 
52 | 


--------------------------------------------------------------------------------
/tests/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def random_data(N=0, K=0, Y_cur=None, D_cur=None, X_cur=None):
 5 | 
 6 | 	if X_cur is not None:
 7 | 		N, K = X_cur.shape
 8 | 	elif D_cur is not None:
 9 | 		N = D_cur.shape[0]
10 | 	elif Y_cur is not None:
11 | 		N = Y_cur.shape[0]
12 | 
13 | 	if N == 0 and K == 0:
14 | 		K = np.random.random_integers(1, 5)
15 | 		N = np.random.random_integers(4, 4*K)
16 | 	elif N != 0 and K == 0:
17 | 		K = np.random.random_integers(1, N-1)
18 | 	elif N == 0 and K != 0:
19 | 		N = np.random.random_integers(4, 4*K)
20 | 
21 | 	data = []
22 | 	if Y_cur is None:
23 | 		Y_data = np.random.rand(N)
24 | 		data.append(Y_data)
25 | 	if D_cur is None:
26 | 		D_data = np.random.random_integers(0, 1, N)
27 | 		# loop to ensure at least two subjects in each group
28 | 		while D_data.sum() <= 1 or D_data.sum() >= N-1:
29 | 			D_data = np.random.random_integers(0, 1, N)
30 | 		data.append(D_data)
31 | 	if X_cur is None:
32 | 		X_data = np.random.rand(N, K)
33 | 		data.append(X_data)
34 | 
35 | 	if len(data) == 1:
36 | 		return data[0]
37 | 	else:
38 | 		return data
39 | 
40 | 


--------------------------------------------------------------------------------