├── .gitignore ├── LICENSE.txt ├── MANIFEST.in ├── README.md ├── causality ├── __init__.py ├── analysis │ ├── README.md │ ├── __init__.py │ ├── dataframe.py │ └── img │ │ ├── continuous_zplot_kernel.png │ │ ├── continuous_zplot_linear.png │ │ ├── continuous_zplot_mlp.png │ │ ├── continuous_zplot_naive.png │ │ ├── continuous_zplot_random_forest.png │ │ ├── discrete_zplot.png │ │ ├── discrete_zplot_bootstrap.png │ │ ├── discrete_zplot_bootstrap_80CL.png │ │ ├── discrete_zplot_naive.png │ │ ├── zmean_results.png │ │ └── zmean_results_no_CI.png ├── estimation │ ├── README.md │ ├── __init__.py │ ├── adjustments.py │ ├── img │ │ ├── z1_support.png │ │ ├── z2_support.png │ │ └── z3_support.png │ ├── nonparametric.py │ └── parametric.py ├── inference │ ├── __init__.py │ ├── independence_tests │ │ └── __init__.py │ └── search │ │ └── __init__.py └── util.py ├── requirements.txt ├── setup.cfg ├── setup.py └── tests ├── __init__.py └── unit ├── __init__.py ├── data ├── X.csv ├── build_X.py └── discrete.csv ├── nonparametric.py ├── parametric.py ├── settings.py ├── test_IC.py └── test_cit.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | *.pyc 3 | .coverage 4 | htmlcov/ -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | Copyright (c) 2018 Adam Kelleher 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in all 10 | copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 17 | SOFTWARE. 18 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.md 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Causality 2 | 3 | This package contains tools for causal analysis using observational (rather than experimental) datasets. 4 | 5 | ## Installation 6 | 7 | Assuming you have pip installed, just run 8 | ``` 9 | pip install causality 10 | ``` 11 | 12 | ## [Causal Analysis](https://github.com/akelleh/causality/tree/master/causality/analysis) 13 | 14 | The simplest interface to this package is probably through the `CausalDataFrame` object in [`causality.analysis.CausalDataFrame`](https://github.com/akelleh/causality/blob/master/causality/analysis/dataframe.py#L8). This is just an extension of the `pandas.DataFrame` object, and so it inherits the same methods. 15 | 16 | The `CausalDataFrame` current supports two kinds of causal analysis. First, it has a `CausalDataFrame.zmean` method. This method lets you control for a set of variables, `z`, when you're trying to estimate the effect of a discrete variable `x` on a continuous variable, `y`. It supports both returning the `y` estimates at each `x` value, as well as providing bootstrap error bars. For more details, check out the readme [here](https://github.com/akelleh/causality/tree/master/causality/analysis). 17 | 18 | The second kind of analysis supported is plotting to show the effect of discrete or continuous `x` on continous `y` while controlling for `z`. You can do this with the `CausalDataFrame.zplot` method. For details, check out the readme [here](https://github.com/akelleh/causality/tree/master/causality/analysis). 19 | 20 | 21 | ## Measuring Causal Effects 22 | 23 | the [`causality.estimation`](https://github.com/akelleh/causality/tree/master/causality/estimation) module contains tools for estimating causal effects from observational and experimental data. Most tools are parametric, like `PropensityScoreMatching`, and can be found in `causality.estimation.parametric`. Other models are non-parametric, and rely on directly estimating densities and using the g-estimation approach. 24 | 25 | 26 | ## DAG Inference 27 | 28 | The `causality.inference` module will contain various algorithms for inferring causal DAGs. Currently (2016/01/23), the only algorithm implemented is the IC\* algorithm from Pearl (2000). It has decent test coverage, but feel free to write some more! I've left some stubs in `tests/unit/test\_IC.py`. 29 | 30 | To run a graph search on a dataset, you can use the algorithms like (using IC\* as an example): 31 | 32 | ```python 33 | import numpy 34 | import pandas as pd 35 | 36 | from causality.inference.search import IC 37 | from causality.inference.independence_tests import RobustRegressionTest 38 | 39 | # generate some toy data: 40 | SIZE = 2000 41 | x1 = numpy.random.normal(size=SIZE) 42 | x2 = x1 + numpy.random.normal(size=SIZE) 43 | x3 = x1 + numpy.random.normal(size=SIZE) 44 | x4 = x2 + x3 + numpy.random.normal(size=SIZE) 45 | x5 = x4 + numpy.random.normal(size=SIZE) 46 | 47 | # load the data into a dataframe: 48 | X = pd.DataFrame({'x1' : x1, 'x2' : x2, 'x3' : x3, 'x4' : x4, 'x5' : x5}) 49 | 50 | # define the variable types: 'c' is 'continuous'. The variables defined here 51 | # are the ones the search is performed over -- NOT all the variables defined 52 | # in the data frame. 53 | variable_types = {'x1' : 'c', 'x2' : 'c', 'x3' : 'c', 'x4' : 'c', 'x5' : 'c'} 54 | 55 | # run the search 56 | ic_algorithm = IC(RobustRegressionTest) 57 | graph = ic_algorithm.search(X, variable_types) 58 | ``` 59 | 60 | Now, we have the inferred graph stored in `graph`. In this graph, each variable is a node (named from the DataFrame columns), and each edge represents statistical dependence between the nodes that can't be eliminated by conditioning on the variables specified for the search. If an edge can be oriented with the data available, the arrowhead is indicated in `'arrows'`. If the edge also satisfies the local criterion for genuine causation, then that directed edge will have `marked=True`. If we print the edges from the result of our search, we can see which edges are oriented, and which satisfy the local criterion for genuine causation: 61 | ```python 62 | >>> graph.edges(data=True) 63 | [('x2', 'x1', {'arrows': [], 'marked': False}), 64 | ('x2', 'x4', {'arrows': ['x4'], 'marked': False}), 65 | ('x3', 'x1', {'arrows': [], 'marked': False}), 66 | ('x3', 'x4', {'arrows': ['x4'], 'marked': False}), 67 | ('x4', 'x5', {'arrows': ['x5'], 'marked': True})] 68 | ``` 69 | 70 | We can see the edges from `'x2'` to `'x4'`, `'x3'` to `'x4'`, and `'x4'` to `'x5'` are all oriented toward the second of each pair. Additionally, we see that the edge from `'x4'` to `'x5'` satisfies the local criterion for genuine causation. This matches the structure given in figure `2.3(d)` in Pearl (2000). 71 | 72 | 73 | ## Nonparametric Effects Estimation 74 | 75 | The `causality.nonparametric` module contains a tool for non-parametrically estimating a causal distribution from an observational data set. You can supply an "admissable set" of variables for controlling, and the measure either the causal effect distribution of an effect given the cause, or the expected value of the effect given the cause. 76 | 77 | I've recently added adjustment for direct causes, where you can estimate the causal effect of fixing a set of X variables on a set of Y variables by adjusting for the parents of X in your graph. Using the dataset above, you can run this like 78 | ```python 79 | from causality.estimation.adjustments import AdjustForDirectCauses 80 | from networkx import DiGraph 81 | 82 | g = DiGraph() 83 | 84 | g.add_nodes_from(['x1','x2','x3','x4', 'x5']) 85 | g.add_edges_from([('x1','x2'),('x1','x3'),('x2','x4'),('x3','x4')]) 86 | adjustment = AdjustForDirectCauses() 87 | ``` 88 | 89 | Then, you can see the set of variables being adjusted for by 90 | ```python 91 | >>> print(adjustment.admissable_set(g, ['x2'], ['x3'])) 92 | set(['x1']) 93 | ``` 94 | If we hadn't adjusted for `'x1'` we would have incorrectly found that `'x2'` had a causal effect on `'x3'` due to the counfounding pathway `x2, x1, x3`. Adjustment for `'x1'` removes this bias. 95 | 96 | You can see the causal effect of intervention, `P(x3|do(x2))` using the measured causal effect in `adjustment`, 97 | ```python 98 | >>> from causality.estimation.nonparametric import CausalEffect 99 | >>> admissable_set = adjustment.admissable_set(g,['x2'], ['x3']) 100 | >>> effect = CausalEffect(X, ['x2'], ['x3'], variable_types=variable_types, admissable_set=list(admissable_set)) 101 | >>> x = pd.DataFrame({'x2' : [0.], 'x3' : [0.]}) 102 | >>> effect.pdf(x) 103 | 0.268915603296 104 | ``` 105 | 106 | Which is close to the correct value of `0.282` for a gaussian with mean 0. and variance 2. If you adjust the value of `'x2'`, you'll find that the probability of `'x3'` doesn't change. This is untrue with just the conditional distribution, `P(x3|x2)`, since in this case, observation and intervention are not equivalent. 107 | 108 | ## Other Notes 109 | 110 | This repository is in its early phases. The run-time for the tests is long. Many optimizations will be made in the near future, including 111 | * Implement fast mutual information calculation, O( N log N ) 112 | * Speed up integrating out variables for controlling 113 | * Take a user-supplied graph, and find the set of admissable sets 114 | * Front-door criterion method for determining causal effects 115 | 116 | Pearl, Judea. _Causality_. Cambridge University Press, (2000). 117 | -------------------------------------------------------------------------------- /causality/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akelleh/causality/e8a6afec34f10c49dee454b2135c23d4f737fac0/causality/__init__.py -------------------------------------------------------------------------------- /causality/analysis/README.md: -------------------------------------------------------------------------------- 1 | # Analysis 2 | 3 | This module contains tools for using the Robin's G-Formula and arbitrary machine learning estimators to estimate and plot causal effects. By "causal effect" we mean the distribution or conditional expectation of Y given X, controlling for an admissable set of covariates, Z, to make the effect identifiable. For a primer on choosing these Z variables, check out the article [here](https://medium.com/@akelleh/a-technical-primer-on-causality-181db2575e41). 4 | 5 | More intuitively, you want to estimate the effect of X on Y, but you know you need to control for some set of confounders, Z, to get the true effect. Otherwise, you expect there to be confounding bias. 6 | 7 | # The `CausalDataFrame` 8 | 9 | The `CausalDataFrame` is an extension of the `pandas.DataFrame`, so you can intialize it as you normally would intialize a `pandas.DataFrame`, e.g. 10 | 11 | ```python 12 | import numpy as np 13 | import pandas as pd 14 | import matplotlib.pyplot as pp 15 | from causality.analysis.dataframe import CausalDataFrame 16 | 17 | N = 1000 18 | 19 | z = np.random.normal(1., size=N) 20 | x = np.random.binomial(1, p=1./(1. + np.exp(-z/.1))) 21 | y = x + z + np.random.normal(size=N) 22 | 23 | # It's easy to create a data frame 24 | df = CausalDataFrame({'x': x, 'y': y, 'z': z}) 25 | ``` 26 | Here, we've created a dataset where `x` has a direct effect on `y`, but a third variable `z` has a direct effect on both `x` and `y`. The result will be that when `z` is higher, both `x` and `y` will tend to be higher due to the influence of `z`. This correlation is not due to the causal dependence between `x` and `y`, and is instead due to confounding by `z`. We can make a causal plot that controls for `z` so we can see the true dependence between `x` and `y` easily with the `CausalDataFrame`: 27 | 28 | 29 | ```python 30 | # and the interface to zplot is basically the same as the pandas.DataFrame.plot method! 31 | df.zplot(x='x', y='y', z_types={'z': 'c'}, z=['z'], kind='bar', bootstrap_samples=500); pp.ylabel("$E[Y|do(X=x)]$"); pp.show() 32 | 33 | ``` 34 | ![The causal estimate](./img/discrete_zplot.png) 35 | 36 | This `zplot` method passes args and kwargs to the `plot` method of the `pandas.DataFrame`, so you can pass it all of its usual formatting options. We'll give a more complete summary of all of its particular methods below. 37 | 38 | You can also still use all of the usual `DataFrame` methods, for example to get a naive plot for comparison. 39 | 40 | ```python 41 | df.groupby('x').mean().reset_index().plot(x='x', y='y', kind='bar'); pp.ylabel("$E[Y|X=x]$"); pp.show() 42 | ``` 43 | ![The naive estimate](./img/discrete_zplot_naive.png) 44 | 45 | The correct answer in this example is that if you intervene to set the value of `x` to `x=0`, you'll find (on average) `y=1`. If you set `x=1`, you'll find (on average) `y=2`. You can see the causal `zplot` method finds the correct answer, within the 95% confidence level. You can see naive observational estimate has much lower `y` at `x=0`! 46 | 47 | ## The `CausalDataFrame.zplot` method 48 | 49 | If you have continous `x`, you can instead use the `kind='line'` argument. Let's generate a similar data set to see how this works. This time, let's have two confounding variables. 50 | 51 | ```python 52 | N = 1000 53 | lower = -1 54 | upper = 1 55 | z1 = np.random.uniform(lower, upper, size=N) 56 | z2 = np.random.uniform(lower, upper, size=N) 57 | x = np.random.uniform(lower, upper, size=N) + (z1 + z2)/2. 58 | z = z1 + z2 59 | y = np.random.normal(size=N) - x + 2.* z 60 | X = CausalDataFrame({'x': x, 'y': y, 'z1': z1, 'z2': z2}) 61 | ``` 62 | We can see from this data generating process that the true relationship, holding the confounders `z` constant, is a negative relationship between `x` and `y` that is linear with a slope of `-1`. If we just plot the (confounded) data, we can't see any relationship at all: 63 | 64 | ```python 65 | X.plot(x='x', y='y', style='bo', alpha=0.2, kind='scatter') 66 | ``` 67 | ![The naive estimate](img/continuous_zplot_naive.png) 68 | 69 | We can control for the `z` variables, and recover the negative relationship! 70 | 71 | ```python 72 | X.zplot(x='x', y='y', z=['z1', 'z2'], z_types={'z1': 'c', 'z2': 'c'}, kind='line') 73 | ``` 74 | ![The random forest estimate](img/continuous_zplot_random_forest.png) 75 | 76 | Unfortunately, the relationship is very noisy. The model used by default to do the controlling is a random forest model. It won't be the best model for every problem, and doesn't work here as well as kernel regression. Those are the two typed models that are currently supported for automatic controlling. You can switch to kernel density regression by specifying `model_type='kernel'`. 77 | 78 | ```python 79 | X.zplot(x='x', y='y', z=['z1', 'z2'], z_types={'z1': 'c', 'z2': 'c'}, kind='line', model_type='kernel') 80 | ``` 81 | ![The kernel estimate](img/continuous_zplot_kernel.png) 82 | 83 | You're free to use other models you define yourself, as well. The models can be fitted or not. If the model is not fitted, you should pass the model object through the `model` kwarg. 84 | 85 | ```python 86 | from sklearn.linear_model import LinearRegression 87 | 88 | treatment = 'x' 89 | outcome = 'y' 90 | confounders = ['z1', 'z2'] 91 | X.zplot(x='x', y='y', z=confounders, z_types={'z1': 'c', 'z2': 'c'}, kind='line', model=LinearRegression) 92 | ``` 93 | ![linear regression model results](./img/continuous_zplot_linear.png) 94 | 95 | 96 | If you'd like to pass a fit model, for example as you might if you're fitting a complicated model like a neural network, you can pass it through the `fitted_model` kwarg. Here's a simple multi-layer perceptron, just to give an example. 97 | 98 | ```python 99 | from sklearn.neural_network import MLPRegressor 100 | model = MLPRegressor(hidden_layer_sizes=(128,128,128), max_iter=100, learning_rate_init=0.01) 101 | 102 | treatment = 'x' 103 | outcome = 'y' 104 | confounders = ['z1', 'z2'] 105 | model.fit(X[[treatment] + confounders], X[outcome]) 106 | # requirement: model.predict(X[[treatment] + confounders]) yields a numpy array of scalar predictions for y, dimension (n_samples,) 107 | X.zplot(x='x', y='y', z=confounders, z_types={'z1': 'c', 'z2': 'c'}, kind='line', fitted_model=model) 108 | ``` 109 | 110 | ![MLP model results](./img/continuous_zplot_mlp.png) 111 | 112 | With discrete variables, it's a little easier to bootstrap error bars. We have some kwargs available to give a little extra control over the bootstrapping process. Note that we use the normal approximation for the bootstrap confidence intervals. Using the percentile approach tended to give overly narrow intervals. 113 | 114 | Returning to the discrete data example from before, 115 | ```python 116 | N = 1000 117 | 118 | z = np.random.normal(1., size=N) 119 | x = np.random.binomial(1, p=1./(1. + np.exp(-z/.1))) 120 | y = x + z + np.random.normal(size=N) 121 | 122 | # It's easy to create a data frame 123 | df = CausalDataFrame({'x': x, 'y': y, 'z': z}) 124 | 125 | # and the interface to zplot is basically the same as the pandas.DataFrame.plot method! 126 | df.zplot(x='x', y='y', z_types={'z': 'c'}, z=['z'], kind='bar', bootstrap_samples=500); pp.ylabel("$E[Y|do(X=x)]$"); pp.show() 127 | ``` 128 | ![discrete plot with bootstrap parameter](./img/discrete_zplot_bootstrap.png) 129 | 130 | The default number of samples for the bootstrap is 500 samples. Much less than that tended to give overly narrow intervals. I'd encourage you to test them yourself with a simulation if you're planning to vary this parameter! 131 | 132 | You can also adjust the confidence level for your error bars. The default is the 95% confidence level. As we decrease the confidence level, you'll see the error bars shrink. 133 | ``` 134 | df.zplot(x='x', y='y', z=['z'], z_types={'z': 'c'}, kind='bar', bootstrap_samples=500, confidence_level=0.80) 135 | ``` 136 | ![discrete plot with bootstrap parameter and 80%CL](./img/discrete_zplot_bootstrap_80CL.png) 137 | 138 | You might also like to just get the values and error bars out from these plots. You can do that for the discrete plot with the `zmean` method. 139 | 140 | ## The `CausalDataFrame.zmean` method 141 | 142 | The interface for the `zmean` method is exactly like the `zplot` method. You can pass models and bootstrap parameters in the same way. The return value is a new dataframe. 143 | 144 | ```python 145 | df.zmean(x='x', y='y', z=['z'], z_types={'z': 'c'}, bootstrap_samples=500, confidence_level=0.95) 146 | ``` 147 | ![zmean results](img/zmean_results.png) 148 | 149 | Leaving off the `bootstrap_samples` kwarg while specifying the `confidence_level` will cause `zmean` to default to `bootstrap_samples=500`. Leaving off both keyword arguments will result in no confidence intervals being given: 150 | ```python 151 | df.zmean(x='x', y='y', z=['z'], z_types={'z': 'c'}) 152 | ``` 153 | ![zmean results](img/zmean_results_no_CI.png) 154 | -------------------------------------------------------------------------------- /causality/analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akelleh/causality/e8a6afec34f10c49dee454b2135c23d4f737fac0/causality/analysis/__init__.py -------------------------------------------------------------------------------- /causality/analysis/dataframe.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import scipy.stats as ss 4 | from statsmodels.nonparametric.kernel_regression import KernelReg 5 | from sklearn.ensemble import RandomForestRegressor 6 | 7 | 8 | class CausalDataFrame(pd.DataFrame): 9 | def zmean(self, *args, **kwargs): 10 | if kwargs.get('confidence_level', None) and not kwargs.get('bootstrap_samples', 0): 11 | kwargs['bootstrap_samples'] = 500 12 | if not kwargs.get('bootstrap_samples', 0) and not kwargs.get('confidence_level', None): 13 | kwargs['bootstrap_samples'] = 1 14 | confidence_level = kwargs.get('confidence_level', 0.95) 15 | treatment = kwargs.get('x') 16 | outcome = kwargs.get('y') 17 | def f(self, df, *args, **kwargs): 18 | model, _ = self._get_model(*args, **kwargs) 19 | treatment = kwargs.get('x') 20 | confounders = kwargs.get('z', []) 21 | df[treatment] = kwargs['xi'] 22 | df['$E[Y|X=x,Z]$'] = model.predict(df[[treatment] + confounders]) 23 | yi = df.mean()['$E[Y|X=x,Z]$'] 24 | return yi 25 | _, arg_key = self._get_model(*args, **kwargs) 26 | if arg_key: 27 | del kwargs[arg_key] 28 | unique_x = self[kwargs.get('x')].unique() 29 | df = self.copy() 30 | xs = [] 31 | lowers = []; uppers = []; expecteds = [] 32 | z = ss.norm().ppf(1. - (1. - confidence_level) / 2.) 33 | for xi in unique_x: 34 | kwargs['xi'] = xi 35 | yi = self._bootstrap_statistic(f, df, *args, **kwargs) 36 | exp = np.mean(yi) 37 | lower, upper = exp - z * np.std(yi), exp + z * np.std(yi)# 38 | lowers.append(lower); uppers.append(upper); expecteds.append(exp) 39 | xs.append(xi) 40 | kwargs['kind'] = 'bar' 41 | kwargs['yerr'] = zip(lowers, uppers) 42 | data = {treatment: xs, outcome: expecteds} 43 | if kwargs['bootstrap_samples'] > 1: 44 | data['{}_lower'.format(outcome)] = lowers 45 | data['{}_upper'.format(outcome)] = uppers 46 | for k in ['xi', 'z', 'bootstrap_samples', 'z_types', 'confidence_level']: 47 | if k in kwargs: 48 | del kwargs[k] 49 | df = pd.DataFrame(data) 50 | return df 51 | 52 | def zplot(self, *args, **kwargs): 53 | if kwargs.get('z', []): 54 | if kwargs.get('kind') == 'line': 55 | return self._line_zplot(*args, **kwargs) 56 | if kwargs.get('kind') == 'bar' or kwargs.get('kind') == 'mean': 57 | return self._bootstrapped_mean_zplot(*args, **kwargs) 58 | else: 59 | if 'z' in kwargs: 60 | del kwargs['z'] 61 | if 'z_types' in kwargs: 62 | del kwargs['z_types'] 63 | return self.plot(*args, **kwargs) 64 | 65 | def _line_zplot(self, *args, **kwargs): 66 | model, arg_key = self._get_model(*args, **kwargs) 67 | if arg_key: 68 | del kwargs[arg_key] 69 | treatment = kwargs.get('x') 70 | outcome = kwargs.get('y') 71 | confounders = kwargs.get('z', []) 72 | xs = [] 73 | ys = [] 74 | xmin, xmax = kwargs.get('xlim', (self[treatment].quantile(0.01), self[treatment].quantile(0.99))) 75 | for xi in np.arange(xmin, xmax, (xmax - xmin) / 100.): 76 | df = self.copy() 77 | df[treatment] = xi 78 | df['$E[Y|X=x,Z]$'] = model.predict(df[[treatment] + confounders]) 79 | yi = df.mean()['$E[Y|X=x,Z]$'] 80 | xs.append(xi) 81 | ys.append(yi) 82 | del kwargs['z'] 83 | if 'z_types' in kwargs: 84 | del kwargs['z_types'] 85 | df = pd.DataFrame({treatment: xs, outcome: ys}) 86 | return df.plot(*args, **kwargs) 87 | 88 | def _bootstrapped_mean_zplot(self, *args, **kwargs): 89 | df = self.zmean(*args, **kwargs) 90 | kwargs['kind'] = 'bar' 91 | if kwargs.get('bootstrap_samples', 0) > 1 or kwargs.get('confidence_level', None): 92 | df['{}_lower'.format(kwargs['y'])] = df[kwargs['y']] - df['{}_lower'.format(kwargs['y'])] 93 | df['{}_upper'.format(kwargs['y'])] = df['{}_upper'.format(kwargs['y'])] - df[kwargs['y']] 94 | kwargs['yerr'] = df[['{}_lower'.format(kwargs['y']), 95 | '{}_upper'.format(kwargs['y'])]].values.T 96 | for k in ['xi', 'z', 'bootstrap_samples', 'z_types', 'confidence_level']: 97 | if k in kwargs: 98 | del kwargs[k] 99 | return df.plot(*args, **kwargs) 100 | 101 | def _bootstrap_statistic(self, f, df, *args, **kwargs): 102 | samples = [] 103 | for _ in range(kwargs.get('bootstrap_samples')): 104 | df_s = df.sample(n=len(df), replace=True) 105 | samples.append(f(self, df_s, *args, **kwargs)) 106 | return samples 107 | 108 | def _get_model(self, *args, **kwargs): 109 | treatment = kwargs.get('x') 110 | outcome = kwargs.get('y') 111 | variable_types = kwargs.get('z_types', {}).copy() 112 | confounders = kwargs.get('z', []) 113 | variable_types[treatment] = 'c' 114 | 115 | if kwargs.get('model'): 116 | model = kwargs.get('model')() 117 | arg_key = 'model' 118 | model.fit(self[[treatment] + confounders], self[outcome]) 119 | elif kwargs.get('fitted_model'): 120 | model = kwargs.get('fitted_model') 121 | arg_key = 'fitted_model' 122 | elif kwargs.get('model_type', '') == 'kernel': 123 | model = KernelModelWrapper() 124 | arg_key = 'model_type' 125 | model.fit(self[[treatment] + confounders], self[outcome], variable_types=variable_types) 126 | else: 127 | model = RandomForestRegressor() 128 | model.fit(self[[treatment] + confounders], self[outcome]) 129 | arg_key = None 130 | return model, arg_key 131 | 132 | class KernelModelWrapper(object): 133 | def __init__(self): 134 | self.model = None 135 | self.variable_types = {} 136 | self.X_shape = None 137 | self.y_shape = None 138 | 139 | def fit(self, X, y, variable_types={}): 140 | self.X_shape = X.shape 141 | self.y_shape = y.shape 142 | if variable_types: 143 | variable_type_string = ''.join([variable_types[col] for col in X.columns]) 144 | self.model = KernelReg(y, X, variable_type_string, reg_type='ll') 145 | else: 146 | self.model = KernelReg(y, X, 'c' * X.shape[1], reg_type='ll') 147 | return self 148 | 149 | def predict(self, X): 150 | if X.shape != self.X_shape: 151 | raise Exception("Expected shape {}, received {}".format(self.X_shape, X.shape)) 152 | return self.model.fit(X)[0] 153 | -------------------------------------------------------------------------------- /causality/analysis/img/continuous_zplot_kernel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akelleh/causality/e8a6afec34f10c49dee454b2135c23d4f737fac0/causality/analysis/img/continuous_zplot_kernel.png -------------------------------------------------------------------------------- /causality/analysis/img/continuous_zplot_linear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akelleh/causality/e8a6afec34f10c49dee454b2135c23d4f737fac0/causality/analysis/img/continuous_zplot_linear.png -------------------------------------------------------------------------------- /causality/analysis/img/continuous_zplot_mlp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akelleh/causality/e8a6afec34f10c49dee454b2135c23d4f737fac0/causality/analysis/img/continuous_zplot_mlp.png -------------------------------------------------------------------------------- /causality/analysis/img/continuous_zplot_naive.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akelleh/causality/e8a6afec34f10c49dee454b2135c23d4f737fac0/causality/analysis/img/continuous_zplot_naive.png -------------------------------------------------------------------------------- /causality/analysis/img/continuous_zplot_random_forest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akelleh/causality/e8a6afec34f10c49dee454b2135c23d4f737fac0/causality/analysis/img/continuous_zplot_random_forest.png -------------------------------------------------------------------------------- /causality/analysis/img/discrete_zplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akelleh/causality/e8a6afec34f10c49dee454b2135c23d4f737fac0/causality/analysis/img/discrete_zplot.png -------------------------------------------------------------------------------- /causality/analysis/img/discrete_zplot_bootstrap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akelleh/causality/e8a6afec34f10c49dee454b2135c23d4f737fac0/causality/analysis/img/discrete_zplot_bootstrap.png -------------------------------------------------------------------------------- /causality/analysis/img/discrete_zplot_bootstrap_80CL.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akelleh/causality/e8a6afec34f10c49dee454b2135c23d4f737fac0/causality/analysis/img/discrete_zplot_bootstrap_80CL.png -------------------------------------------------------------------------------- /causality/analysis/img/discrete_zplot_naive.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akelleh/causality/e8a6afec34f10c49dee454b2135c23d4f737fac0/causality/analysis/img/discrete_zplot_naive.png -------------------------------------------------------------------------------- /causality/analysis/img/zmean_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akelleh/causality/e8a6afec34f10c49dee454b2135c23d4f737fac0/causality/analysis/img/zmean_results.png -------------------------------------------------------------------------------- /causality/analysis/img/zmean_results_no_CI.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akelleh/causality/e8a6afec34f10c49dee454b2135c23d4f737fac0/causality/analysis/img/zmean_results_no_CI.png -------------------------------------------------------------------------------- /causality/estimation/README.md: -------------------------------------------------------------------------------- 1 | # causality.estimation 2 | 3 | This module is for causal effect estimation! When you run a randomized controlled experiment (e.g. an A/B test), you know that people in the test group are, on average, similar to people in the control group. For any given covariate, Z, you expect that the average of Z in each group is the same. 4 | 5 | When you only have observational data, you can't be sure that the group assignments are independent of other covariates. The worst case scenario is that the effect of the treatment is different between the test and the control group. Then, the treatment's effect on the test group no longer represents the average effect of the treatment over everyone. 6 | 7 | In a drug trial, for example, people might take the drug if they've taken it in the past and know it works, and might not take it if they've taken it before and found that it doesn't work. Then, you'll find that the drug is much more effective for people who normally take it (your observational test group) than people who don't normally take it. If you enacted a policy where everyone who gets sick gets the drug, then you'll find it much less effective on average than it would have appeared from your observational data: your controlled intervention not gives the treatment to people it has no effect on! 8 | 9 | Our goal, then, is to take observational data and be able to answer questions about controlled interventions. There are some excellent books on the subject if you're interested in all of the details of how these methods work, but this package's documentation will give high-level explanations with a focus on application. Some excellent references for more depth are Morgan and Winship's [_Counterfactuals and Causal Inference_](https://www.amazon.com/Counterfactuals-Causal-Inference-Principles-Analytical/dp/1107694167), Hernan's [_Causal Inference_](https://www.hsph.harvard.edu/miguel-hernan/causal-inference-book/), Pearl's groundbreaking (but extremely difficult, and not application-focused) [_Causality_] (https://www.amazon.com/Causality-Reasoning-Inference-Judea-Pearl/dp/052189560X), or Imbens and Rubin's [_Causal Inference_](https://www.amazon.com/Causal-Inference-Statistics-Biomedical-Sciences/dp/0521885884/ref=sr_1_1?s=books&ie=UTF8&qid=1496343137&sr=1-1&keywords=imbens+and+rubin). 10 | 11 | There are some critical caveats to all of these approaches. First, if you don't know what variables to control for, you're often out of luck. This is true of all methods that rely on controlling. Other methods, like Instrumental Variables, or mechanism-based methods, get around this by instead making certain assumptions about the structure of the system you're studying. We'll make a note of which type of algorithm you're dealing with in the tutorial for that algorithm, but it should be relatively clear from the context. This distinction is a little artificial, since you can often do controlling alongside approaches that rely on structural assumptions. 12 | 13 | 14 | ## Sub-modules: 15 | ### parametric 16 | Most of the classic models you'd like to use are probably in this portion of the package. Currently, these include propensity score matching and difference-in-differences. 17 | 18 | #### PropensityScoreMatching 19 | 20 | Propensity score matching tries to attack the problem of dissimilar test and control groups directly. You have the option of making the test group more similar to the control group, or vice versa. When we're talking about similarity, we mean similar by some metric. In the case of propensity score matching, that metric is the "propensity score". The propensity score is the probability a unit is assigned to the treatment given a set of covariates, $$P(D|Z_1, Z_2, ..., Z_n)$$. We can use a specific example to make all of this concrete. We'll run through the example for a high-level explanation, and then go in-depth into the assumptions and caveats. 21 | 22 | ##### High-level Example 23 | 24 | Suppose we're in the publishing business, and we're interested in the effect of "the length of an article title" on "the click-through rate of the article" (the proportion of times when a link to an article is seen and also clicked). To make things really simple, we'll just consider "long" titles and "short" titles. We're interested in how much better a long title clicks than a short title. 25 | 26 | There's a big problem: we can't force our writers to make their titles a certain length. Even worse, we think that our better writers tend to write longer titles. Since they're better writers, their titles also tend to click better _independently from the effects of the length of the title on click-through rates_. This results in a correlation between title length and click-through rates, even if there is no causal effect! They are both caused by the author. 27 | 28 | In order to handle this, we can try to control for the effect of the author. There's a direct way to do this, by looking at the effect of title length on click-through rates for each author, and then averaging over authors. That way, each effect measurement controls for author, and you average the effect measurements together to get the total result. This easy to do when we only care about one variable, but usually we want to control for a lot more. Consider that the vertical (e.g. news, entertainment, etc.) the author writes for might also confound the effect (e.g. news headlines might be longer, but also more interesting and so clickier). The more variables there are to control for, the harder it is to find data for every possible combination of values. This is where propensity score matching really shines: if you're willing to assume a model for the propensity scores, then you can do this kind of controlling. In this package, we build in a logistic regression model. In general, you can use any model you like. 29 | 30 | In order to use this package, the simplest implementation assumes you have all of the relevant data in a pandas.DataFrame object, `X`. We'll have author names as strings in `X['author']`, title length as `0` for short, and `1` for long in `X['title_length']`, vertical in `X['vertical']`, and the outcome we're interested in, the click-through rate (CTR) in `X['ctr']`. 31 | 32 | Estimating the effect is as simple as 33 | ```python 34 | from causality.estimation.parametric import PropensityScoreMatching 35 | 36 | matcher = PropensityScoreMatching() 37 | matcher.estimate_ATE(X, 'title_length', 'ctr', {'author': 'u', 'vertical': 'u'}) 38 | ``` 39 | 40 | The first argument contains your data, the second is the name of the dataframe column with the "cause" (must be binary for PSM, but there's a little flexibility on how you encode it. Check the docstring for details.), the 3rd argument is the name of the outcome. The 4th argument is a dictionary that tells the algorithm what you'd like to control for. It needs to know whether your data is discrete or continuous, so the values of the dictionary are `'c'` for continuous, `'o'` for ordered and discrete, and `'u'` for unordered and discrete. 41 | 42 | The name `ATE` stands for "average treatment effect". It means the average benefit of the `1` state over the `0` state. 43 | 44 | Now, we'll do a more in-depth example which will involve examining whether a few assumptions we make with PSM are satisfied, and we'll see how to get confidence intervals. 45 | 46 | ##### Detailed Example 47 | 48 | Propensity score matching does a lot of work internally. It attempts to find treatment and control units who are similar to each other, so any differences in them can be attributed to the difference treatment assignments. We're making a few assumptions here. The most critical is probably that we've controlled for all of the variables that say whether two units are "similar enough" to be matched together. There is a very technical criterion called the ["back-door criterion"](http://bayes.cs.ucla.edu/BOOK-2K/ch3-3.pdf) (BDC) that answers this question. It's impossible to check without doing an experiment. This is a common problem with using observational data. For this reason, most methods are really just "best guesses" of the true results. Generally, you hope that controlling for more things removes bias, but even this isn't guaranteed. 49 | 50 | There are, however, a few diagnostics that help you figure out whether you've done a good job matching. Once you've done the matching, the distribution of the Z's between the test and control should end up pretty similar. The easiest trick is probably to examine the average value of each Z between the test and control group, and make sure most of the difference is gone. If so, your matching is probably okay. If not, you should play with the matching algorithm's parameters and try to do a better job. This works well in practice, but it has been noted that you [can actually increase imbalance using PSM](https://gking.harvard.edu/files/gking/files/psnot.pdf). What you really care about is that you have controlled for all the relevant variables, and that the propensity scores are balanced. These scores satisfy the BDC if the variables that generate them do, and if the model used to estimate them is correct. Thus, controlling for propensity scores, if your modeling assumptions are correct, is sufficient. 51 | 52 | Let's run through a quick example of propensity score matching to see how easy it can be! 53 | 54 | First, we need to generate a data set that has some bias, since we're dealing with observational data. This will simulate an observational data set where the treatment's effectiveness varies depending on some other variables, Z. These will also correlate with whether a unit is assigned to the treatment or control group. 55 | 56 | First, lets's generate our Z variables. These are analogous to "vertical" and "author" from the simple example before. Here, we'll make them continuous.' 57 | 58 | ```python 59 | import pandas as pd 60 | import numpy as np 61 | from causality.estimation.parametric import PropensityScoreMatching 62 | 63 | N = 10000 64 | z1 = np.random.normal(size=N) 65 | z2 = np.random.normal(size=N) 66 | z3 = np.random.normal(size=N) 67 | ``` 68 | 69 | Next, we want to define the variable that is analogous to "long" or "short" title. We want someone to be more likely to use a long title if the Z variables are higher, so we'll make the probability of `d=1` higher if any Z is higher, using a logistic function. 70 | 71 | ```python 72 | p_d = 1. / (1. + np.exp(-(z1 + z2 + z3)/4.)) 73 | d = np.random.binomial(1, p=p_d) 74 | ``` 75 | 76 | So people use long titles with a probability `p_d`. 77 | 78 | 79 | Next, we want to define our outcomes. We'll call these `y`. Before, these were just CTRs, so they were between 0 and 1. Now, they'll be real-valued. To make the effect of the treatment really explicit, we'll explicitly define the outcome for each unit in the case theat they're assigned to the `d=1` state, `y1`, or the `d=0` state, `y0`. These variables are called the "potential outcomes". They are the outcomes that are possible for each unit, depending on the `d` variable. `d` is often called the "treatment assignment," since the effect we're looking for is actually the result of an imaginary completely randomized experiment, where we randomized assigning some articles to having long titles, and others to having short titles. 80 | 81 | The `d=0` outcome will be normal random. This is the baseline success for each article title. The `d=1` state will be the `d=0` state plus a difference that depends on the `z` variables. 82 | 83 | ```python 84 | y0 = np.random.normal() 85 | y1 = y0 + z1 + z2 + z3 86 | ``` 87 | The difference between these is just `z1 + z2 + z3`. This is a weird effect. It says that if an article has a long title, then it will perform `z1 + z2 + z3` better than if the article has a short title, everything else held fixed. The weirdness here is the dependence on the Z variables: people with higher Z tend to write better long titles than people with lower Z. If some of these Z variables represent the skill of the author, then we interpret this as "when a skillful author writes a longer title, it tends to perform better than when they write a short title by and amount that depends on the author's skill.". 88 | 89 | Now, we just need to define the actual measured outcome. The `d` variable chooses whether each article has a long or short title, so it chooses between the `y0` and `y1` outcomes. We'll put it all together into a dataframe. 90 | 91 | ```python 92 | y = (d==1)*y1 + (d==0)*y0 93 | 94 | X = pd.DataFrame({'d': d, 'z1': z1, 'z2': z2, 'z3': z3, 'y': y, 'y0': y0, 'y1': y1, 'p': p_d}) 95 | ``` 96 | 97 | The variable `y0` is the value that `y` would take if the unit is in the control group. The variable `y1` is the value the unit would take if it were in the test group. A unit can only be in one group when you measure its outcome, so you can only measure `y = y0`` or `y = y1`` in practice. Normally, you can't observe the potential outcomes. The only reason we have them here is because we wrote the data-generating process. 98 | 99 | Notice that these Z variables determine both whether a unit will be assigned to the `d=0` or `d=1` state (the higher the `z`s are, the higher `p_d` is), and they also determine the size of the outcome (the difference is just the sum of the `z`s, so higher `z` means higher treatment effectiveness.). This results in bias if you just use a naive estimate for the average treatment effectiveness: 100 | 101 | ```python 102 | > X[X['d'] == 1].mean()['y'] - X[X['d'] == 0].mean()['y'] 103 | 0.3648 104 | ``` 105 | Taking a look at the true average treatment effect, the average difference between `(y1 - y0).mean()`, we can read off that it's just the average of `z1 + z2 + z3`. `z1 + z2 + z3` is the sum of three normal variables, so has mean zero. Thus, there is no average treatment effect! Our naive estimate of `0.36` is far from the true value. We can calculate the true value directly: 106 | 107 | ```python 108 | > (y1 - y0).mean() 109 | -0.0002 110 | ``` 111 | 112 | which is only different from zero due to sampling error. 113 | 114 | Since we can't measure these potential outcome variables, we want to use PropensityScoreMatching to control for the variables that cause the bias. We can do this very easily! 115 | ```python 116 | > matcher = PropensityScoreMatching() 117 | > matcher.estimate_ATE(X, 'd', 'y', {'z1': 'c', 'z2': 'c', 'z3': 'c'}) 118 | -0.00011 119 | ``` 120 | and so we get the right average treatment effect (within measurement error). If you pass the argument `bootstrap=True` to the `estimate_ATE` method, it will return a 95\% confidence interval (bootstrap estimate) for the ATE. Bootstrap methods tend to be [conservative](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4260115/). 121 | 122 | Here, we put in a dataframe, `X`, that contains a binary treatment assignment column, `'d'`, an outcome column, `'y'`, and a dictionary of variables to control for. The keys are the names of the columns to use for controlling, and the values are one of `('c', 'o', 'u')` corresponding to continuous, ordered discrete, or unordered discrete variables, respectively. 123 | 124 | When you pass these arguments, the method builds a logistic regression model using the control variables to predict treatment assignment. The probabilities of treatment assingment, a.k.a. propensity scores, are used to match the treatment and control units using nearest neighbors (with a heuristic to improve matching for discrete variables). The matches are then used to calculate treatment effects on typical treated individuals, and typical control individuals, and then these effects are weighted and averaged to get the averate treatment effect on the whole population. This should agree with the value (within sampling error) of `(y1 - y0).mean()`, which is what we were trying to calculate! 125 | 126 | There are a few critical assumptions to make sure your model gives a good estimate of the true average treatment effect (ATE). 127 | 128 | 1. You must control for common causes of treatment status and the outcome (more precisely, a minimal set of common causes satisfying "the back-door criterion", see Pearl's book [here](http://bayes.cs.ucla.edu/BOOK-2K/ch3-3.pdf)). This can be the biggest show-stopper, because you may not know what all the common causes are, and you may not have them measured even if you do know. 129 | 2. The propensity score model (by default, logistic regression) must be the right model for the propensity scores. Misspecification (e.g. non-linearity, or even a different link function) will lead to systematic error in propensity scores, which can hurt the quality of your matching. 130 | 3. The true propensity scores must be "probabilistic", and not deterministic. In other words, they must be strictly between 0 and 1 (and not equal to zero or one). 131 | 4. The test and control group must have the same support over the Z variables. If there are regions of Z where there are test units, but not control units, you can't estimate the average treatment effect, but might still be able to get a conditional average treatment effect. See Morgan and Winship's discussion in the book mentioned above for more details. 132 | 133 | ###### Checking the Common Support 134 | Assuming these are satisfied, we can at least check that our matching does what we're expecting. First, a requirement of PSM is that the covariates overlap. You should technically check this for the `N` dimensional space of all `N` of your covariates, but we make it easy to check the `1-D` subspaces. Run 135 | 136 | ```python 137 | matcher.check_support(X, 'd', {'z1': 'c', 'z2': 'c', 'z3': 'c'}) 138 | ``` 139 | 140 | And you'll find the following plots 141 | 142 | ![z1 support](./img/z1_support.png) 143 | ![z2 support](./img/z2_support.png) 144 | ![z3 support](./img/z3_support.png) 145 | 146 | You can see visually that the distributions overlap well on the x-axis. Thus, the Z's (at least in 1-D) share a common support, and the assumption is satisfied. 147 | 148 | ###### Checking Covariate Balance 149 | 150 | If the matching we're doing does a good job of making the test and control groups "look like" each other in terms of the `Z` variables, then we should find that statistics of the `Z` variables between the test and control groups are the same. This isn't actually a requirement of PSM, but if the test and ctontrol are balanced on `Z`, then they should be balanced on the propensity scores. We have a handy tool for checking balance. 151 | 152 | ```python 153 | matcher.assess_balance(X, 'd', {'z1': 'c', 'z2': 'c', 'z3': 'c'}) 154 | ``` 155 | 156 | will return 157 | 158 | ```python 159 | {'z1': 0.2458132624378607, 160 | 'z2': 0.26803071286101415, 161 | 'z3': 0.22545847989783488} 162 | ``` 163 | 164 | so there is a fair amount of imbalance before matching. Next, we can get matched test and control groups. First, we need to generate the propensity scores, 165 | 166 | ```python 167 | X = matcher.score(X, assignment='d', confounder_types={'z1': 'c', 'z2': 'c', 'z3': 'c'}) 168 | ``` 169 | 170 | so now we have a new column in `X` labelled `propensity score`. Now, we'll run the matching 171 | 172 | ```python 173 | treated, control = matcher.match(X, assignment='d') 174 | ``` 175 | 176 | and finally re-assess the balance after matching 177 | 178 | ```python 179 | matcher.assess_balance(pd.concat([treated, control]), 'd', {'z1': 'c', 'z2': 'c', 'z3': 'c'}) 180 | {'z1': 0.00031457811654961971, 181 | 'z2': 0.01274281423785816, 182 | 'z3': -0.01515794796420316} 183 | ``` 184 | 185 | Note that you can use this feature to assess balance on the propensity score after adding it to `X`, 186 | 187 | ```python 188 | matcher.assess_balance(X, 'd', {'z1': 'c', 'z2': 'c', 'z3': 'c', 'propensity score': 'c'}) 189 | {'propensity score': 0.44348102876997414, 190 | 'z1': 0.26127781471482076, 191 | 'z2': 0.2577923164800251, 192 | 'z3': 0.24351497330531932} 193 | 194 | matcher.assess_balance(pd.concat([treated, control]), 'd', {'z1': 'c', 'z2': 'c', 'z3': 'c', 'propensity score': 'c'}) 195 | {'propensity score': 0.00067420782959645405, 196 | 'z1': 4.3693151229817443e-05, 197 | 'z2': -0.0044512025748346248, 198 | 'z3': 0.006435102509766962} 199 | ``` 200 | 201 | so indeed we've done a good job of balancing the propensity scores between the groups. 202 | 203 | 204 | ### nonparametric 205 | Documentation in progress! 206 | 207 | ### adjustments 208 | 209 | Documentation in progress! -------------------------------------------------------------------------------- /causality/estimation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akelleh/causality/e8a6afec34f10c49dee454b2135c23d4f737fac0/causality/estimation/__init__.py -------------------------------------------------------------------------------- /causality/estimation/adjustments.py: -------------------------------------------------------------------------------- 1 | from networkx.algorithms import is_directed_acyclic_graph 2 | 3 | class AdjustmentException(Exception): 4 | pass 5 | 6 | class AdjustForDirectCauses(object): 7 | def __init__(self): 8 | pass 9 | 10 | def find_predecessors(self, g, causes): 11 | predecessors = set() 12 | for cause in causes: 13 | predecessors = predecessors.union(g.predecessors(cause)) 14 | return predecessors - set(causes) 15 | 16 | def assumptions_satisfied(self, g, causes, effects, predecessors): 17 | if not is_directed_acyclic_graph(g): 18 | return False 19 | if not len(set(effects).intersection(set(causes).union(predecessors))) == 0: 20 | return False 21 | return True 22 | 23 | def admissable_set(self, g, causes, effects): 24 | predecessors = self.find_predecessors(g, causes) 25 | if not self.assumptions_satisfied(g, causes, effects, predecessors): 26 | raise AdjustmentException("Failed to satisfy adjustment assumptions") 27 | return predecessors 28 | -------------------------------------------------------------------------------- /causality/estimation/img/z1_support.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akelleh/causality/e8a6afec34f10c49dee454b2135c23d4f737fac0/causality/estimation/img/z1_support.png -------------------------------------------------------------------------------- /causality/estimation/img/z2_support.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akelleh/causality/e8a6afec34f10c49dee454b2135c23d4f737fac0/causality/estimation/img/z2_support.png -------------------------------------------------------------------------------- /causality/estimation/img/z3_support.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akelleh/causality/e8a6afec34f10c49dee454b2135c23d4f737fac0/causality/estimation/img/z3_support.png -------------------------------------------------------------------------------- /causality/estimation/nonparametric.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from statsmodels.nonparametric.kernel_density import KDEMultivariateConditional, KDEMultivariate, EstimatorSettings 3 | from statsmodels.nonparametric.kernel_regression import KernelReg 4 | import itertools 5 | from scipy.integrate import nquad 6 | from scipy import stats 7 | import numpy as np 8 | from networkx.algorithms import is_directed_acyclic_graph 9 | 10 | try: 11 | xrange 12 | except NameError: 13 | xrange = range 14 | 15 | class CausalEffect(object): 16 | def __init__(self, X, causes, effects, admissable_set=[], variable_types=None, expectation=False, density=True): 17 | """ 18 | We want to calculate the causal effect of X and Y through 19 | back-door adjustment, P(Y|do(X)) = Sum( P(Y|X,Z)P(Z), Z) 20 | for some admissable set of control variables, Z. First we 21 | calculate the conditional density P(Y|X,Z), then the density 22 | P(Z). We find the support of Z so we can properly sum over 23 | it later. variable_types are a dictionary with the column name 24 | pointing to an element of set(['o', 'u', 'c']), for 'ordered', 25 | 'unordered discrete', or 'continuous'. 26 | """ 27 | conditional_density_vars = causes + admissable_set 28 | self.causes = causes 29 | self.effects = effects 30 | self.admissable_set = list(admissable_set) # uses a list internally; AdjustForDirectCauses.admissable_set returns a set 31 | self.conditional_density_vars = conditional_density_vars 32 | 33 | if len(X) > 300 or max(len(causes+admissable_set),len(effects+admissable_set)) >= 3: 34 | self.defaults=EstimatorSettings(n_jobs=4, efficient=True) 35 | else: 36 | self.defaults=EstimatorSettings(n_jobs=-1, efficient=False) 37 | 38 | if variable_types: 39 | self.variable_types = variable_types 40 | dep_type = [variable_types[var] for var in effects] 41 | indep_type = [variable_types[var] for var in conditional_density_vars] 42 | density_types = [variable_types[var] for var in admissable_set] 43 | else: 44 | self.variable_types = self.__infer_variable_types(X) 45 | 46 | if 'c' not in variable_types.values(): 47 | bw = 'cv_ml' 48 | else: 49 | bw = 'normal_reference' 50 | 51 | 52 | if admissable_set: 53 | self.density = KDEMultivariate(X[admissable_set], 54 | var_type=''.join(density_types), 55 | bw=bw, 56 | defaults=self.defaults) 57 | 58 | self.conditional_density = KDEMultivariateConditional(endog=X[effects], 59 | exog=X[conditional_density_vars], 60 | dep_type=''.join(dep_type), 61 | indep_type=''.join(indep_type), 62 | bw=bw, 63 | defaults=self.defaults) 64 | if expectation: 65 | self.conditional_expectation = KernelReg(X[effects].values, 66 | X[conditional_density_vars].values, 67 | ''.join(indep_type), 68 | bw='cv_ls') 69 | 70 | self.support = self.__get_support(X) 71 | 72 | self.discrete_variables = [ variable for variable, var_type in self.variable_types.items() if var_type in ['o', 'u']] 73 | self.discrete_Z = list(set(self.discrete_variables).intersection(set(admissable_set))) 74 | self.continuous_variables = [ variable for variable, var_type in self.variable_types.items() if var_type == 'c' ] 75 | self.continuous_Z = list(set(self.continuous_variables).intersection(set(admissable_set))) 76 | 77 | 78 | def __infer_variable_types(self,X): 79 | """ 80 | fill this in later. 81 | """ 82 | pass 83 | 84 | 85 | def __get_support(self, X): 86 | """ 87 | find the smallest cube around which the densities are supported, 88 | allowing a little flexibility for variables with larger bandwidths. 89 | """ 90 | data_support = { variable : (X[variable].min(), X[variable].max()) for variable in X.columns} 91 | variable_bandwidths = { variable : bw for variable, bw in zip(self.effects + self.conditional_density_vars, self.conditional_density.bw)} 92 | support = {} 93 | for variable in self.effects + self.conditional_density_vars: 94 | if self.variable_types[variable] == 'c': 95 | lower_support = data_support[variable][0] - 10. * variable_bandwidths[variable] 96 | upper_support = data_support[variable][1] + 10. * variable_bandwidths[variable] 97 | support[variable] = (lower_support, upper_support) 98 | else: 99 | support[variable] = data_support[variable] 100 | return support 101 | 102 | 103 | def integration_function(self,*args): 104 | # takes continuous z, discrete z, then x 105 | data = pd.DataFrame({ k : [v] for k, v in zip(self.continuous_Z + self.discrete_Z + self.causes + self.effects, args)}) 106 | conditional = self.conditional_density.pdf(exog_predict=data[self.conditional_density_vars].values[0], 107 | endog_predict=data[self.effects].values[0]) 108 | density = self.density.pdf(data_predict=data[self.admissable_set]) 109 | return conditional * density 110 | 111 | 112 | def expectation_integration_function(self, *args): 113 | data = pd.DataFrame({ k : [v] for k, v in zip(self.continuous_Z + self.discrete_Z + self.causes, args)}) 114 | conditional = self.conditional_expectation.fit(data_predict=data[self.conditional_density_vars].values)[0] 115 | density = self.density.pdf(data_predict=data[self.admissable_set]) 116 | return conditional * density 117 | 118 | 119 | def pdf(self, x): 120 | """ 121 | Currently, this does the whole sum/integral over the cube support of Z. 122 | We may be able to improve this by taking into account how the joint 123 | and conditionals factorize, and/or finding a more efficient support. 124 | 125 | This should be reasonably fast for |Z| <= 2 or 3, and small enough discrete 126 | variable cardinalities. It runs in O(n_1 n_2 ... n_k) in the cardinality of 127 | the discrete variables, |Z_1| = n_1, etc. It likewise runs in O(V^n) for n 128 | continuous Z variables. Factorizing the joint/conditional distributions in 129 | the sum could linearize the runtime. 130 | """ 131 | causal_effect = 0. 132 | x = x[self.causes + self.effects] 133 | if self.discrete_Z: 134 | discrete_variable_ranges = [ xrange(*(int(self.support[variable][0]), int(self.support[variable][1])+1)) for variable in self.discrete_Z] 135 | for z_vals in itertools.product(*discrete_variable_ranges): 136 | z_discrete = pd.DataFrame({k : [v] for k, v in zip(self.discrete_Z, z_vals)}) 137 | if self.continuous_Z: 138 | continuous_Z_ranges = [self.support[variable] for variable in self.continuous_Z] 139 | args = z_discrete.join(x).values[0] 140 | causal_effect += nquad(self.integration_function,continuous_Z_ranges,args=args)[0] 141 | else: 142 | z_discrete = z_discrete[self.admissable_set] 143 | exog_predictors = x.join(z_discrete)[self.conditional_density_vars] 144 | conditional = self.conditional_density.pdf(exog_predict=exog_predictors, 145 | endog_predict=x[self.effects]) 146 | density = self.density.pdf(data_predict=z_discrete) 147 | dc = conditional * density 148 | causal_effect += dc 149 | return causal_effect 150 | elif self.continuous_Z: 151 | continuous_Z_ranges = [self.support[var] for var in self.continuous_Z] 152 | causal_effect, error = nquad(self.integration_function,continuous_Z_ranges,args=tuple(x.values[0])) 153 | return causal_effect 154 | else: 155 | return self.conditional_density.pdf(exog_predict=x[self.causes],endog_predict=x[self.effects]) 156 | 157 | 158 | 159 | def expected_value( self, x): 160 | """ 161 | Currently, this does the whole sum/integral over the cube support of Z. 162 | We may be able to improve this by taking into account how the joint 163 | and conditionals factorize, and/or finding a more efficient support. 164 | 165 | This should be reasonably fast for |Z| <= 2 or 3, and small enough discrete 166 | variable cardinalities. It runs in O(n_1 n_2 ... n_k) in the cardinality of 167 | the discrete variables, |Z_1| = n_1, etc. It likewise runs in O(V^n) for n 168 | continuous Z variables. Factorizing the joint/conditional distributions in 169 | the sum could linearize the runtime. 170 | """ 171 | causal_effect = 0. 172 | x = x[self.causes] 173 | if self.discrete_Z: 174 | discrete_variable_ranges = [ xrange(*(int(self.support[variable][0]), int(self.support[variable][1])+1)) for variable in self.discrete_Z] 175 | for z_vals in itertools.product(*discrete_variable_ranges): 176 | z_discrete = pd.DataFrame({k : [v] for k, v in zip(self.discrete_Z, z_vals)}) 177 | if self.continuous_Z: 178 | continuous_Z_ranges = [self.support[variable] for variable in self.continuous_Z] 179 | args = z_discrete.join(x).values[0] 180 | causal_effect += nquad(self.expectation_integration_function,continuous_Z_ranges,args=args)[0] 181 | else: 182 | z_discrete = z_discrete[self.admissable_set] 183 | exog_predictors = x.join(z_discrete)[self.conditional_density_vars] 184 | causal_effect += self.conditional_expectation.fit(data_predict=exog_predictors.values)[0] * self.density.pdf(data_predict=z_discrete.values) 185 | return causal_effect 186 | elif self.continuous_Z: 187 | continuous_Z_ranges = [self.support[var] for var in self.continuous_Z] 188 | causal_effect, error = nquad(self.expectation_integration_function,continuous_Z_ranges,args=tuple(x.values[0])) 189 | return causal_effect 190 | else: 191 | return self.conditional_expectation.fit(data_predict=x[self.causes])[0] 192 | 193 | -------------------------------------------------------------------------------- /causality/estimation/parametric.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from statsmodels.regression.linear_model import OLS, WLS 3 | from statsmodels.robust.robust_linear_model import RLM 4 | from statsmodels.discrete.discrete_model import Logit 5 | from sklearn.neighbors import NearestNeighbors 6 | from causality.util import bootstrap_statistic 7 | import numpy as np 8 | import logging 9 | 10 | 11 | class DifferenceInDifferences(object): 12 | def __init__(self, robust=True): 13 | """ 14 | We will take a dataframe where each row is a user, 15 | and the columns are: 16 | (1) Assignment: 1 = test, 0 = control 17 | (2) Start: The value of the metric you're interested 18 | in at the start of the experiment. 19 | (3) End: The value of the metric you're interested in 20 | at the end of the experiment. 21 | """ 22 | if robust: 23 | self.model = RLM 24 | else: 25 | self.model = OLS 26 | 27 | def average_treatment_effect(self, X, start='Start', end='End', assignment='assignment'): 28 | test = X[X[assignment]==1][[start ,end]] 29 | control = X[X[assignment]==0][[start,end]] 30 | del X 31 | 32 | test_initial = test[start] 33 | test_final = test[end] 34 | control_initial = control[start] 35 | control_final = control[end] 36 | del test, control 37 | 38 | df = pd.DataFrame({'y' : test_initial, 39 | assignment : [1. for i in test_initial], 40 | 't' :[0. for i in test_initial] }) 41 | df = pd.concat([df, pd.DataFrame({'y' : test_final, 42 | assignment : [1. for i in test_final], 43 | 't' :[1. for i in test_final] })]) 44 | df = pd.concat([df, pd.DataFrame({'y' : control_initial, 45 | assignment : [0. for i in control_initial], 46 | 't' :[0. for i in control_initial] })]) 47 | 48 | df = pd.concat([df, pd.DataFrame({'y' : control_final, 49 | assignment : [0. for i in control_final], 50 | 't' :[1. for i in control_final] })]) 51 | del test_initial, test_final, control_initial, control_final 52 | df['did'] = df['t'] * df[assignment] 53 | df['intercept'] = 1. 54 | 55 | model = self.model(df['y'], df[['t', assignment,'did', 'intercept']]) 56 | result = model.fit() 57 | conf_int = result.conf_int().ix['did'] 58 | expected = result.params['did'] 59 | return conf_int[0], expected, conf_int[1] 60 | 61 | def test_parallel_trend(self, X, start='Start', end='End', assignment='assignment'): 62 | """ 63 | This will find the average treatment effect on 64 | a dataset before the experiment is run, to make 65 | sure that it is zero. This tests the assumption 66 | that the average treatment effect between the test 67 | and control groups when neither is treated is 0. 68 | 69 | The format for this dataset is the same as that 70 | for the real estimation task, except that the start 71 | time is some time before the experiment is run, and 72 | the end time is the starting point for the experiment. 73 | """ 74 | lower, exp, upper = self.average_treatment_effect(X,start=start, end=end, assignment=assignment) 75 | if lower <= 0 <= upper: 76 | return True 77 | return False 78 | 79 | class PropensityScoringModel(object): 80 | def __init__(self): 81 | # change the model if there are multiple matches per treated! 82 | self.propensity_score_model = None 83 | 84 | def score(self, X, confounder_types, assignment='assignment', store_model_fit=False, intercept=True, propensity_score_name='propensity score'): 85 | """ 86 | Fit a propensity score model using the data in X and the confounders listed in confounder_types. This adds 87 | the propensity scores to the dataframe, and returns the new dataframe. 88 | 89 | :param X: The data set, with (at least) an assignment, set of confounders, and an outcome 90 | :param assignment: A categorical variable (currently, 0 or 1) indicating test or control group, resp. 91 | :param outcome: The outcome of interest. Should be real-valued or ordinal. 92 | :param confounder_types: A dictionary of variable_name: variable_type pairs of strings, where 93 | variable_type is in {'c', 'o', 'd'}, for 'continuous', 'ordinal', and 'discrete'. 94 | :param store_model_fit: boolean, Whether to store the model as an attribute of the class, as 95 | self.propensity_score_model 96 | :param intercept: Whether to include an intercept in the logistic regression model 97 | :return: A new dataframe with the propensity scores included 98 | """ 99 | df = X[[assignment]].copy() 100 | regression_confounders = [] 101 | for confounder, var_type in confounder_types.items(): 102 | if var_type == 'o' or var_type == 'u': 103 | c_dummies = pd.get_dummies(X[[confounder]], prefix=confounder) 104 | if len(c_dummies.columns) == 1: 105 | df = pd.concat([df, c_dummies[c_dummies.columns]], axis=1) 106 | regression_confounders.extend(c_dummies.columns) 107 | else: 108 | df = pd.concat([df, c_dummies[c_dummies.columns[1:]]], axis=1) 109 | regression_confounders.extend(c_dummies.columns[1:]) 110 | else: 111 | regression_confounders.append(confounder) 112 | df.loc[:, confounder] = X[confounder].copy() 113 | df.loc[:, confounder] = X[confounder].copy() 114 | if intercept: 115 | df.loc[:, 'intercept'] = 1. 116 | regression_confounders.append('intercept') 117 | logit = Logit(df[assignment], df[regression_confounders]) 118 | model = logit.fit() 119 | if store_model_fit: 120 | self.propensity_score_model = model 121 | X.loc[:, propensity_score_name] = model.predict(df[regression_confounders]) 122 | return X 123 | 124 | class PropensityScoreMatching(PropensityScoringModel): 125 | def __init__(self): 126 | # change the model if there are multiple matches per treated! 127 | self.propensity_score_model = None 128 | 129 | def match(self, X, assignment='assignment', score='propensity score', n_neighbors=2, treated_value=1, 130 | control_value=0, match_to='treated'): 131 | """ 132 | For each unit, match n_neighbors units in the other group (test or control) with the closest propensity scores 133 | (matching with replacement). 134 | 135 | :param X: The data set in a pandas.DataFrame, with (at least) an assignment, set of confounders, and an outcome 136 | :param assignment: A categorical variable (currently, 1 or 0) indicating test or control group, resp. 137 | :param score: The name of the column in X containing the propensity scores. Default is 'propensity score' 138 | :param n_neighbors: The number of neighbors to match to each unit. 139 | :return: two pandas.DataFrames. the first contains the treated units, and the second contains the control units. 140 | """ 141 | X = X.reset_index() 142 | treated = X[X[assignment] == treated_value].copy() 143 | control = X[X[assignment] == control_value].copy() 144 | if match_to == 'treated': 145 | return self.get_control_matches(treated, control, score=score, n_neighbors=n_neighbors) 146 | elif match_to == 'control': 147 | return self.get_treated_matches(treated, control, score=score, n_neighbors=n_neighbors) 148 | else: 149 | treated, matched_control = self.get_control_matches(treated, control, score=score, n_neighbors=n_neighbors) 150 | matched_treated, control = self.get_treated_matches(treated, control, score=score, n_neighbors=n_neighbors) 151 | return pd.concat([treated, matched_treated]), pd.concat([control, matched_control]) 152 | 153 | def get_control_matches(self, treated, control, score='propensity score', n_neighbors=2): 154 | """ 155 | Given a group of treated and control units, return two dataframes with control matches to the treated units, and the original treated units. 156 | 157 | :param treated: a pandas.DataFrame of treated units 158 | :param control: a pandas.DataFrame of control units 159 | :param score: the name of the column in the treated and control dataframe containing the propensity scores 160 | :param n_neighbors: the number of control units to match to each treated unit 161 | :return: two dataframes. The first contains the original treated units, the second is the matched control units. 162 | """ 163 | neighbor_search = NearestNeighbors(metric='euclidean', n_neighbors=n_neighbors) 164 | neighbor_search.fit(control[[score]].values) 165 | treated.loc[:, 'matches'] = treated[score].apply(lambda x: self.get_matches(x, control, neighbor_search, score, n_neighbors)) 166 | join_data = [] 167 | for treatment_index, row in treated.iterrows(): 168 | matches = row['matches'].flatten() 169 | for match in matches: 170 | join_data.append({'treatment_index': treatment_index, 'control_index': match}) 171 | join_data = pd.DataFrame(join_data) 172 | matched_control = join_data.join(control, on='control_index') 173 | del treated['matches'] 174 | del matched_control['control_index'] 175 | treated.loc[:, 'weight'] = 1. 176 | matched_control.loc[:, 'weight'] = 1. / float(n_neighbors) 177 | return treated, matched_control 178 | 179 | def get_treated_matches(self, treated, control, score='propensity score', n_neighbors=2): 180 | """ 181 | Given a group of treated and control units, return two dataframes with treatment matches to the control units, and the original control units. 182 | 183 | :param treated: a pandas.DataFrame of treated units 184 | :param control: a pandas.DataFrame of control units 185 | :param score: the name of the column in the treated and control dataframe containing the propensity scores 186 | :param n_neighbors: the number of treated units to match to each control unit 187 | :return: two dataframes. The first containes the matched units, the second is the original control dataframe. 188 | """ 189 | neighbor_search = NearestNeighbors(metric='euclidean', n_neighbors=n_neighbors) 190 | neighbor_search.fit(treated[[score]].values) 191 | control.loc[:, 'matches'] = control[score].apply(lambda x: self.get_matches(x, treated, neighbor_search, score, n_neighbors)) 192 | join_data = [] 193 | for control_index, row in control.iterrows(): 194 | matches = row['matches'].flatten() 195 | for match in matches: 196 | join_data.append({'control_index': control_index, 'treated_index': match}) 197 | join_data = pd.DataFrame(join_data) 198 | matched_treated = join_data.join(treated, on='treated_index') 199 | del control['matches'] 200 | del matched_treated['control_index'] 201 | matched_treated.loc[:, 'weight'] = 1. / float(n_neighbors) 202 | control.loc[:, 'weight'] = 1. 203 | return matched_treated, control 204 | 205 | 206 | def get_matches(self, score, potential_matches, knn, score_name, n_neighbors): 207 | """ 208 | Discrete covariates can result in many unit having exactly the same propensity score. Since we don't get random 209 | neighbors, we'd end up using the same units over and over again when matching. Instead, we should find all units 210 | within the same distance as the closest n units, and randomly select matches from those. 211 | 212 | :param score: The score of the unit we're matching 213 | :param potential_matches: the dataframe of units we might match. 214 | :param knn: the K nearest neighbors model, a trained sklearn NearestNeighbors model 215 | :param score_name: The dataframe column in the control df with the propensity scores 216 | :param n_neighbors: The number of matches we'd like 217 | :return: The indices of the matched units in the dataframe of potential matches. 218 | """ 219 | max_distance = max(knn.kneighbors([[score]])[0].flatten()) # max(knn.kneighbors(score)[0].flatten()) 220 | lower_score = score - max_distance 221 | upper_score = score + max_distance 222 | gt = potential_matches[potential_matches[score_name] >= lower_score] 223 | candidates = gt[gt[score_name] <= upper_score] 224 | if len(candidates) < n_neighbors: 225 | logging.warning("Insufficient matches found. Returning None.") 226 | return np.array([]) 227 | else: 228 | return candidates.sample(n_neighbors).index.values 229 | 230 | 231 | def estimate_treatments(self, treatments, matched_control, outcome): 232 | """ 233 | Find the average outcome of the matched control units for each treatment unit. Add it to the treatment dataframe 234 | as a new column called 'control outcome'. 235 | 236 | :param treatments: A dataframe containing at least an outcome, and a list of indices for matches (in the control 237 | dataframe). This should be generated as the output of the self.match method. 238 | :param control: The dataframe containing the matches for the treatment dataframe. This should be generated as 239 | the output of the self.match method. 240 | :param outcome: A float or ordinal representing the outcome of interest. 241 | :return: The treatment dataframe with the matched control outcome for each unit in a new column, 242 | 'control outcome'. 243 | """ 244 | control_outcomes = matched_control.groupby('treatment_index').mean()[[outcome]] 245 | control_outcomes.loc[:, 'control outcome'] = control_outcomes[outcome] 246 | del control_outcomes[outcome] 247 | return treatments.join(control_outcomes) 248 | 249 | def estimate_ATT(self, X, assignment, outcome, confounder_types, n_neighbors=5, bootstrap=False): 250 | """ 251 | Estimate the average treatment effect for people who normally take the test assignment. Assumes a 1 for 252 | the test assignment, 0 for the control assignment. 253 | 254 | :param X: The data set, with (at least) an assignment, set of confounders, and an outcome 255 | :param assignment: A categorical variable (currently, 0 or 1) indicating test or control group, resp. 256 | :param outcome: The outcome of interest. Should be real-valued or ordinal. 257 | :param confounder_types: A dictionary of variable_name: variable_type pairs of strings, where 258 | variable_type is in {'c', 'o', 'd'}, for 'continuous', 'ordinal', and 'discrete'. 259 | :param n_neighbors: An integer for the number of neighbors to use with k-nearest-neighbor matching 260 | :return: a float representing the treatment effect on the treated 261 | """ 262 | df = self.score(X, confounder_types, assignment).copy() 263 | treatments, matched_control = self.match(df, assignment=assignment, score='propensity score', n_neighbors=n_neighbors) 264 | df = pd.concat([treatments, matched_control]) 265 | return self.get_weighted_effect_estimate(assignment, df, outcome, bootstrap=bootstrap)#estimate_ATT(df) 266 | 267 | def estimate_ATC(self, X, assignment, outcome, confounder_types, n_neighbors=5, bootstrap=False): 268 | """ 269 | Estimate the average treatment effect for people who normally take the control assignment. Assumes a 1 for 270 | the test assignment, 0 for the control assignment. 271 | 272 | :param X: The data set, with (at least) an assignment, set of confounders, and an outcome 273 | :param assignment: A categorical variable (currently, 0 or 1) indicating test or control group, resp. 274 | :param outcome: The outcome of interest. Should be real-valued or ordinal. 275 | :param confounder_types: A dictionary of variable_name: variable_type pairs of strings, where 276 | variable_type is in {'c', 'o', 'd'}, for 'continuous', 'ordinal', and 'discrete'. 277 | :param n_neighbors: An integer for the number of neighbors to use with k-nearest-neighbor matching 278 | :return: a float representing the treatment effect on the control 279 | """ 280 | df = self.score(X, confounder_types, assignment).copy() 281 | treatments, matched_control = self.match(df, assignment=assignment, score='propensity score', 282 | n_neighbors=n_neighbors, match_to='control') 283 | df = pd.concat([treatments, matched_control]) 284 | return self.get_weighted_effect_estimate(assignment, df, outcome, bootstrap=bootstrap) 285 | 286 | def estimate_ATE(self, X, assignment, outcome, confounder_types, score=None, n_neighbors=5, bootstrap=False): 287 | """ 288 | Find the Average Treatment Effect(ATE) on the population. An ATE can be estimated as a weighted average of the 289 | ATT and ATC, weighted by the proportion of the population who is treated or not, resp. Assumes a 1 for 290 | the test assignment, 0 for the control assignment. 291 | 292 | :param X: The data set, with (at least) an assignment, set of confounders, and an outcome 293 | :param assignment: A categorical variable (currently, 0 or 1) indicating test or control group, resp. 294 | :param outcome: The outcome of interest. Should be real-valued or ordinal. 295 | :param confounder_types: A dictionary of variable_name: variable_type pairs of strings, where 296 | variable_type is in {'c', 'o', 'd'}, for 'continuous', 'ordinal', and 'discrete'. 297 | :param score: the name of the column containing propensity scores 298 | :param n_neighbors: An integer for the number of neighbors to use with k-nearest-neighbor matching 299 | :return: a float representing the average treatment effect 300 | """ 301 | if not score: 302 | X = self.score(X, confounder_types, assignment) 303 | score = 'propensity score' 304 | treated, control = self.match(X, assignment=assignment, score=score, n_neighbors=n_neighbors, treated_value=1, 305 | control_value=0, match_to='all') 306 | return self.get_weighted_effect_estimate(assignment, pd.concat([treated, control]), outcome, bootstrap=bootstrap) 307 | 308 | 309 | def get_weighted_effect_estimate(self, assignment, df, outcome, bootstrap=False): 310 | def estimate(df): 311 | treated = df[df[assignment] == 1] 312 | control = df[df[assignment] == 0] 313 | treated_outcome = (treated[outcome]*treated['weight']).sum() / treated['weight'].sum() 314 | control_outcome = (control[outcome]*control['weight']).sum() / control['weight'].sum() 315 | return treated_outcome - control_outcome 316 | if bootstrap: 317 | return bootstrap_statistic(df, estimate) 318 | else: 319 | return estimate(df) 320 | 321 | 322 | def assess_balance(self, X, assignment, confounder_types): 323 | """ 324 | Given a data frame X, and a set of confounders, calculate the imbalance of the confounders over the (binary) 325 | treatment assignment. This makes a good optimization metric when choosing different regression models for 326 | the propensity score. 327 | 328 | :param X: The dataframe containing at least the assignment, the control variables, and the outcome variables. 329 | There's no need to turn the control variables into dummies -- that is handled automatically. 330 | :param assignment: The name of the column in the dataframe containing the binary treatment assignment. 331 | :param confounder_types: A dictionary containing the names of the columns in the dataframe holding the control 332 | variables, and the type of each of those variables ('c' = continuous, 'o' = ordinal, 'd' = discrete) 333 | :return: a dictionary containing the name of each control variable and the amount of imbalance on that variable. 334 | """ 335 | df = X.copy() 336 | imbalances = {} 337 | for confounder, confounder_type in confounder_types.items(): 338 | if confounder_type != 'c': 339 | confounder_dummies = pd.get_dummies(df[confounder], prefix=confounder) 340 | df.loc[:, confounder_dummies.columns] = confounder_dummies 341 | dummy_imbalances = [] 342 | for dummy in confounder_dummies.columns: 343 | dummy_imbalances.append(np.abs(self.calculate_imbalance(df, dummy, assignment))) 344 | imbalances[confounder] = sum(dummy_imbalances) 345 | else: 346 | imbalance = self.calculate_imbalance(df, confounder, assignment) 347 | imbalances[confounder] = imbalance 348 | return imbalances 349 | 350 | def calculate_imbalance(self, X, x, d): 351 | """ 352 | Calculate the balance metric to assess how unbalanced x is across the two levels of (binary) treatment assignment, 353 | d. 354 | 355 | :param X: The data containing the test and control populations 356 | :param x: The name of the confounding column. 357 | :param d: The name of the treatment assignment variable. 358 | :return: 359 | """ 360 | numerator = X[X[d] == 1].mean()[x] - X[X[d] == 0].mean()[x] 361 | denominator = np.sqrt((X[X[d] == 1].var()[x] + X[X[d] == 0].var()[x])/2.) 362 | return numerator / denominator 363 | 364 | def check_support(self, X, assignment, confounder_types=None): 365 | """ 366 | Check the 1-d support over all the confounders. You should check higher-dimensional supports yourself. 367 | This will plot the histograms of the test and control data, so you can visually assess the region 368 | of common support. 369 | :param X: You dataframe containing, minimally, the assignment and confounders. 370 | :param confounder_types: A dictionary where the keys are the names of the confounders, and the values are 371 | one of 'd', 'o', or 'c'. 372 | :return: None 373 | """ 374 | import matplotlib.pyplot as pp 375 | test = X[X[assignment] == 1].copy() 376 | control = X[X[assignment] == 0].copy() 377 | 378 | for zi in confounder_types.keys(): 379 | test[zi].hist(bins=30, alpha=0.5, color='r') 380 | control[zi].hist(bins=30, alpha=0.5, color='b') 381 | pp.title('Test (red) and Control (blue) Support for {}'.format(zi)); 382 | pp.xlabel(zi) 383 | pp.ylabel('Count') 384 | pp.show() 385 | 386 | 387 | class InverseProbabilityWeightedLS(PropensityScoringModel): 388 | def __init__(self): 389 | self.propensity_score_model = None 390 | self.wls_model = None 391 | 392 | def estimate_effect(self, X, assignment, outcome, confounder_types, propensity_score_name='propensity score', 393 | additional_weight_column=None, weight_name='weights', ols_intercept='True', effect='ATE'): 394 | X = self.compute_weights(X, 395 | assignment, 396 | outcome, 397 | confounder_types, 398 | propensity_score_name=propensity_score_name, 399 | additional_weight_column=additional_weight_column, 400 | weight_name=weight_name, 401 | effect=effect) 402 | self.fit_WLS(X, assignment, outcome, confounder_types, weight_name=weight_name, intercept=ols_intercept) 403 | return self.wls_model.conf_int().transpose()[assignment][0], self.wls_model.params[assignment], self.wls_model.conf_int().transpose()[assignment][1] 404 | 405 | def estimate_ATE(self, X, assignment, outcome, confounder_types, propensity_score_name='propensity score', 406 | additional_weight_column=None, weight_name='weights', ols_intercept='True'): 407 | return self.estimate_effect(X, assignment, outcome, confounder_types, propensity_score_name='propensity score', 408 | additional_weight_column=None, weight_name='weights', ols_intercept='True', effect='ATE') 409 | 410 | def estimate_ATC(self, X, assignment, outcome, confounder_types, propensity_score_name='propensity score', 411 | additional_weight_column=None, weight_name='weights', ols_intercept='True'): 412 | return self.estimate_effect(X, assignment, outcome, confounder_types, propensity_score_name='propensity score', 413 | additional_weight_column=None, weight_name='weights', ols_intercept='True', effect='ATC') 414 | 415 | def estimate_ATT(self, X, assignment, outcome, confounder_types, propensity_score_name='propensity score', 416 | additional_weight_column=None, weight_name='weights', ols_intercept='True'): 417 | return self.estimate_effect(X, assignment, outcome, confounder_types, propensity_score_name='propensity score', 418 | additional_weight_column=None, weight_name='weights', ols_intercept='True', effect='ATT') 419 | 420 | def compute_weights(self, X, assignment, outcome, confounder_types, propensity_score_name='propensity score', 421 | additional_weight_column=None, weight_name='weights', effect='ATE'): 422 | X = self.score(X, 423 | confounder_types, 424 | assignment=assignment, 425 | store_model_fit=True, 426 | intercept=True, 427 | propensity_score_name=propensity_score_name) 428 | if effect == 'ATE': 429 | X.loc[:, weight_name] = (X[assignment] == 1) / X[propensity_score_name] + (X[assignment] == 0) / (1. - X[propensity_score_name]) 430 | elif effect == 'ATC': 431 | X.loc[:, weight_name] = (X[assignment] == 1) * (1. - X[propensity_score_name]) / X[propensity_score_name] + (X[assignment] == 0) * 1. 432 | elif effect == 'ATT': 433 | X.loc[:, weight_name] = (X[assignment] == 1) * 1. + (X[assignment] == 0) * X[propensity_score_name] / (1. - X[propensity_score_name]) 434 | else: 435 | raise Exception('Effect {} not recognized'.format(effect)) 436 | 437 | if additional_weight_column: 438 | X.loc[:, weight_name] = X[weight_name] * X[additional_weight_column] 439 | return X 440 | 441 | def fit_WLS(self, X, assignment, outcome, confounder_types, weight_name='weights', intercept='True'): 442 | df = X[[assignment, outcome]].copy() 443 | regression_confounders = [] 444 | for confounder, var_type in confounder_types.items(): 445 | if var_type == 'o' or var_type == 'u': 446 | c_dummies = pd.get_dummies(X[[confounder]], prefix=confounder) 447 | if len(c_dummies.columns) == 1: 448 | df = pd.concat([df, c_dummies[c_dummies.columns]], axis=1) 449 | regression_confounders.extend(c_dummies.columns) 450 | else: 451 | df = pd.concat([df, c_dummies[c_dummies.columns[1:]]], axis=1) 452 | regression_confounders.extend(c_dummies.columns[1:]) 453 | else: 454 | regression_confounders.append(confounder) 455 | df.loc[:, confounder] = X[confounder].copy() 456 | df.loc[:, confounder] = X[confounder].copy() 457 | if intercept: 458 | df.loc[:, 'intercept'] = 1. 459 | regression_confounders.append('intercept') 460 | model = WLS(df[outcome], df[[assignment] + regression_confounders], weights=X[weight_name]) 461 | result = model.fit() 462 | self.wls_model = result 463 | return result 464 | -------------------------------------------------------------------------------- /causality/inference/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akelleh/causality/e8a6afec34f10c49dee454b2135c23d4f737fac0/causality/inference/__init__.py -------------------------------------------------------------------------------- /causality/inference/independence_tests/__init__.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import statsmodels.api as sm 4 | import scipy.stats 5 | import itertools 6 | from collections import Counter 7 | 8 | DEFAULT_BINS = 2 9 | 10 | class RobustRegressionTest(): 11 | def __init__(self, y, x, z, data, alpha): 12 | self.regression = sm.RLM(data[y], data[x+z]) 13 | self.result = self.regression.fit() 14 | self.coefficient = self.result.params[x][0] 15 | confidence_interval = self.result.conf_int(alpha=alpha/2.) 16 | self.upper = confidence_interval[1][x][0] 17 | self.lower = confidence_interval[0][x][0] 18 | 19 | def independent(self): 20 | if self.coefficient > 0.: 21 | if self.lower > 0.: 22 | return False 23 | else: 24 | return True 25 | else: 26 | if self.upper < 0.: 27 | return False 28 | else: 29 | return True 30 | 31 | class ChiSquaredTest(): 32 | def __init__(self, y, x, z, data, alpha): 33 | self.alpha = alpha 34 | self.total_chi2 = 0. 35 | self.total_dof = 0 36 | for xi, yi in itertools.product(x,y): 37 | tables = data[[xi]+[yi]+z].copy() 38 | groupby_key = list([zi for zi in z] + [xi]) 39 | tables = tables.join(pd.get_dummies(data[yi],prefix=yi)).groupby(groupby_key).sum() 40 | del tables[yi] 41 | 42 | z_values = {zi : data.groupby(zi).groups.keys() for zi in z} 43 | x_values = {xi : data.groupby(xi).groups.keys()} 44 | y_values = {yi : data.groupby(yi).groups.keys()} 45 | 46 | contingencies = itertools.product(*[z_values[zi] for zi in z]) 47 | 48 | for contingency in contingencies: 49 | contingency_table = tables.loc[contingency].values 50 | try: 51 | chi2, _, dof, _ = scipy.stats.chi2_contingency(contingency_table) 52 | except ValueError: 53 | raise Exception("Not enough data or entries with 0 present: Chi^2 Test not applicable.") 54 | self.total_dof += dof 55 | self.total_chi2 += chi2 56 | self.total_p = 1. - scipy.stats.chi2.cdf(self.total_chi2, self.total_dof) 57 | 58 | def independent(self): 59 | if self.total_p < self.alpha: 60 | return False 61 | else: 62 | return True 63 | 64 | 65 | class MutualInformationTest(): 66 | """ 67 | This is mostly from "Distribution of Mutual Information" by Marcus Hutter. This MVP implementation 68 | doesn't contain priors, but will soon be adjusted to include the priors for n_xy. 69 | 70 | It uses a very basic variance estimate on MI to get approximate confidence intervals 71 | on I(X,Y|Z=z) for each z, then basic error propagation (incorrectly assuming 0 covariance, i.e. 72 | Cov(I(X,Y|Z=z_i), I(X,Y|Z=z_j)) = 0. This second assumption results in an underestimate of the 73 | final confidence interval. 74 | """ 75 | def __init__(self, y, x, z, X, alpha, variable_types={}): 76 | self.I, self.dI = self.discrete_mutual_information(x, y, z, X) 77 | z = scipy.stats.norm.ppf(1.-alpha/2.) # one-sided 78 | self.dI = z*self.dI 79 | 80 | def independent(self): 81 | if self.I - self.dI > 0.: 82 | return False 83 | else: 84 | return True 85 | 86 | def discrete_mutual_information(self, x, y, z, X): 87 | n_z = Counter() 88 | for zi in X[z].values: 89 | n_z[tuple(zi)] += 1. 90 | N = sum(n_z.values()) 91 | conditional_informations = {} 92 | for zi, n_zi in n_z.items(): 93 | zi_subset = X.copy() 94 | for col, val in zip(z,zi): 95 | zi_subset = zi_subset[zi_subset[col] == val] 96 | conditional_informations[zi] = self.max_likelihood_information(x,y,zi_subset) 97 | I_ml = sum([(kz/N)*conditional_informations[zi][0] for zi, kz in n_z.items()]) 98 | dI_ml = np.sqrt(sum([((kz/N)*conditional_informations[zi][1])**2. for zi, kz in n_z.items()])) 99 | return I_ml, dI_ml 100 | 101 | def max_likelihood_information(self, x, y, X): 102 | """ 103 | This estimator appears to get very imprecise quickly as the dimensions and 104 | cardinality of x and y get larger. It works well for dimensions around 1, 105 | and cardinality around 5. Higher dimensions require lower cardinality. For 106 | further refinment, I'll have to see if using a prior for I(x,y) helps. 107 | """ 108 | n_x = Counter() 109 | n_y = Counter() 110 | n_xy = Counter() 111 | for xy in X[x+y].values: 112 | xi = xy[:len(x)] 113 | yi = xy[len(x):] 114 | n_x[tuple(xi)] += 1. 115 | n_y[tuple(yi)] += 1. 116 | n_xy[(tuple(xi),tuple(yi))] += 1. 117 | N = sum(n_x.values()) 118 | I_ml = sum([(k / N) * np.log(k * N / float(n_x[xi]*n_y[yi])) for (xi,yi), k in n_xy.items()]) 119 | K = sum([(k / N) * (np.log(k * N / float(n_x[xi]*n_y[yi])))**2. for (xi,yi), k in n_xy.items()]) 120 | return I_ml, np.sqrt((K - I_ml**2.)/(N + 1.)) 121 | 122 | -------------------------------------------------------------------------------- /causality/inference/search/__init__.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import itertools 3 | 4 | """ 5 | This is an implementation of the IC* (Inductive Causation with latent 6 | variables) algorithm as described in _Causality_ by Judea Pearl, 2000. 7 | """ 8 | 9 | try: 10 | xrange 11 | except NameError: 12 | xrange = range 13 | 14 | class SearchException(Exception): 15 | pass 16 | 17 | class IC(): 18 | def __init__(self, independence_test, alpha=0.05, k=None): 19 | self.independence_test = independence_test 20 | self.alpha = alpha 21 | self.separating_sets = None 22 | self._g = None 23 | self.max_k = k 24 | 25 | def search(self, data, variable_types): 26 | self._build_g(variable_types) 27 | self._find_skeleton(data, variable_types) 28 | self._orient_colliders() 29 | 30 | added_arrows = True 31 | while added_arrows: 32 | R1_added_arrows = self._apply_recursion_rule_1() 33 | R2_added_arrows = self._apply_recursion_rule_2() 34 | added_arrows = R1_added_arrows or R2_added_arrows 35 | 36 | return self._g 37 | 38 | def _build_g(self, variable_types): 39 | """ 40 | This initializes a complete graph over the variables. We'll run 41 | independence tests on the complete graph to cut edges by trying to 42 | find separating sets. 43 | """ 44 | self._g = nx.Graph() 45 | self._g.add_nodes_from(variable_types.keys()) 46 | for var, var_type in variable_types.items(): 47 | self._g.nodes[var]['type'] = var_type 48 | edges_to_add = [] 49 | for (node_a, node_b) in itertools.combinations(self._g.nodes.keys(), 2): 50 | edges_to_add.append((node_a,node_b)) 51 | self._g.add_edges_from(edges_to_add, marked=False) 52 | 53 | def _apply_recursion_rule_1(self): 54 | added_arrows = False 55 | for c in self._g.nodes(): 56 | for (a,b) in itertools.combinations(self._g.neighbors(c), 2): 57 | if not self._g.has_edge(a,b): 58 | if c in self._g[a][c]['arrows'] and c not in self._g[b][c]['arrows'] and not (b in self._g[b][c]['arrows'] and self._g[b][c]['marked']): 59 | self._g[b][c]['arrows'].append(b) 60 | self._g[b][c]['marked'] = True 61 | added_arrows = True 62 | if c in self._g[b][c]['arrows'] and c not in self._g[a][c]['arrows'] and not (a in self._g[a][c]['arrows'] and self._g[a][c]['marked']): 63 | self._g[a][c]['arrows'].append(a) 64 | self._g[a][c]['marked'] = True 65 | added_arrows = True 66 | return added_arrows 67 | 68 | def _apply_recursion_rule_2(self): 69 | added_arrows = False 70 | for (a,b) in self._g.edges(): 71 | if b not in self._g[a][b]['arrows']: 72 | if self._marked_directed_path(a,b): 73 | self._g[a][b]['arrows'].append(b) 74 | added_arrows = True 75 | if a not in self._g[a][b]['arrows']: 76 | if self._marked_directed_path(b,a): 77 | self._g[a][b]['arrows'].append(a) 78 | added_arrows = True 79 | return added_arrows 80 | 81 | def _marked_directed_path(self,a,b): 82 | seen = [a] 83 | neighbors = [(a,neighbor) for neighbor in self._g.neighbors(a)] 84 | while neighbors: 85 | (parent, child) = neighbors.pop() 86 | if child in self._g[parent][child]['arrows'] and self._g[parent][child]['marked']: 87 | if child == b: 88 | return True 89 | if child not in seen: 90 | neighbors += [(child, neighbor) for neighbor in self._g.neighbors(child)] 91 | seen.append(child) 92 | return False 93 | 94 | 95 | def _orient_colliders(self): 96 | for v_i, v_j in self._g.edges(): 97 | self._g[v_i][v_j]['arrows'] = [] 98 | for v_c in self._g.nodes(): 99 | for (v_a,v_b) in itertools.combinations(self._g.neighbors(v_c), 2): 100 | if not self._g.has_edge(v_a,v_b): 101 | if v_c not in self.separating_set(v_a,v_b): 102 | self._g[v_a][v_c]['arrows'].append(v_c) 103 | self._g[v_b][v_c]['arrows'].append(v_c) 104 | 105 | def separating_set(self, xi, xj, data=None, variable_types=None): 106 | if not self.separating_sets and data and variable_types: 107 | if not self._g: 108 | self._build_g(variable_types) 109 | self._find_skeleton(data, variable_types) 110 | elif not self.separating_sets and not (data and variable_types): 111 | raise SearchException("Can't measure separating sets: Need data and var types.") 112 | if (xi,xj) in self.separating_sets: 113 | return self.separating_sets[(xi,xj)] 114 | elif (xj,xi) in self.separating_sets: 115 | return self.separating_sets[(xj,xi)] 116 | else: 117 | return False 118 | 119 | def _find_skeleton(self, data, variable_types): 120 | """ 121 | For each pair of nodes, run a conditional independence test over 122 | larger and larger conditioning sets to try to find a set that 123 | d-separates the pair. If such a set exists, cut the edge between 124 | the nodes. If not, keep the edge. 125 | """ 126 | self.separating_sets = {} 127 | if not self.max_k: 128 | self.max_k = len(self._g.nodes)+1 129 | for N in range(self.max_k + 1): 130 | for (x, y) in list(self._g.edges()): 131 | x_neighbors = list(self._g.neighbors(x)) 132 | y_neighbors = list(self._g.neighbors(y)) 133 | z_candidates = list(set(x_neighbors + y_neighbors) - set([x,y])) 134 | for z in itertools.combinations(z_candidates, N): 135 | test = self.independence_test([y], [x], list(z), 136 | data, self.alpha) 137 | if test.independent(): 138 | self._g.remove_edge(x,y) 139 | self.separating_sets[(x,y)] = z 140 | break 141 | -------------------------------------------------------------------------------- /causality/util.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def bootstrap_statistic(df, function, bootstrap_samples=1000, lower_confidence=0.025, upper_confidence=0.975, values=False): 4 | """ This gives bootstrap confidence intervals on the population value 5 | of function given the sample represented by iterable.""" 6 | statistics = [] 7 | for _ in range(bootstrap_samples): 8 | sampled_df = df.sample(n=len(df), replace=True) 9 | statistics.append(function(sampled_df)) 10 | samples = pd.Series(statistics) 11 | if values: 12 | return samples 13 | else: 14 | cis = samples.quantile([lower_confidence,upper_confidence]) 15 | lower_ci = cis[lower_confidence] 16 | expected = samples.mean() 17 | upper_ci = cis[upper_confidence] 18 | return lower_ci, expected, upper_ci -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | decorator>=4.1.2 2 | networkx>=2.0 3 | numpy>=1.13.3 4 | pandas>=0.20.3 5 | patsy>=0.4.1 6 | python-dateutil>=2.6.1 7 | pytz>=2017.2 8 | scipy>=0.19.1 9 | six>=1.11.0 10 | statsmodels>=0.8.0 11 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from codecs import open 3 | from os import path 4 | 5 | here = path.abspath(path.dirname(__file__)) 6 | with open(path.join(here, 'README.md'), encoding='utf-8') as f: 7 | long_description = f.read() 8 | 9 | setup( 10 | name='causality', 11 | 12 | version='0.0.11', 13 | 14 | description='Tools for causal inference', 15 | long_description=long_description, 16 | 17 | url='http://github.com/akelleh/causality', 18 | 19 | author='Adam Kelleher', 20 | author_email='akelleh@gmail.com', 21 | 22 | license='MIT', 23 | 24 | classifiers=[ 25 | 'Development Status :: 3 - Alpha', 26 | 'Intended Audience :: Developers', 27 | 'Intended Audience :: Science/Research', 28 | 'Intended Audience :: Education', 29 | 'Topic :: Scientific/Engineering :: Mathematics', 30 | 'Topic :: Scientific/Engineering :: Information Analysis', 31 | 'Topic :: Scientific/Engineering :: Bio-Informatics', 32 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 33 | 'Topic :: Scientific/Engineering', 34 | 'License :: OSI Approved :: MIT License', 35 | 'Programming Language :: Python :: 2.7', 36 | ], 37 | 38 | keywords='causality data analysis inference causal graphs DAG propensity score matching', 39 | 40 | packages=find_packages(exclude=['tests']), 41 | 42 | install_requires=['numpy', 'scipy', 'pandas', 43 | 'statsmodels', 'networkx', 'patsy', 44 | 'pytz', 'python-dateutil', 'decorator', 45 | 'pytz', 'six'] 46 | ) 47 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akelleh/causality/e8a6afec34f10c49dee454b2135c23d4f737fac0/tests/__init__.py -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | class TestAPI(unittest.TestCase): 4 | pass 5 | -------------------------------------------------------------------------------- /tests/unit/data/X.csv: -------------------------------------------------------------------------------- 1 | ,a,b,c,d 2 | 0,0.2533891593069615,265,258,1325.0 3 | 1,0.2450242714027675,237,264,1185.0 4 | 2,0.43529871344581905,458,408,2290.0 5 | 3,0.6069691416129691,605,626,3025.0 6 | 4,0.290566551340373,319,273,1595.0 7 | 5,0.41882133798612586,406,428,2030.0 8 | 6,0.7943663221685078,789,796,3945.0 9 | 7,0.2748876711709862,287,285,1435.0 10 | 8,0.16383138180058207,169,166,845.0 11 | 9,0.4917510057133061,470,496,2350.0 12 | 10,0.6833080458867128,690,673,3450.0 13 | 11,0.6384019305118127,657,636,3285.0 14 | 12,0.3051972532081103,312,301,1560.0 15 | 13,0.2831262484471249,279,289,1395.0 16 | 14,0.2740645620025809,264,268,1320.0 17 | 15,0.47107028975495213,503,480,2515.0 18 | 16,0.91233456094232,915,914,4575.0 19 | 17,0.3370807276427999,338,339,1690.0 20 | 18,0.7486438986092696,750,733,3750.0 21 | 19,0.7089435060867699,731,682,3655.0 22 | 20,0.7162268292514113,721,705,3605.0 23 | 21,0.3809564605007662,371,348,1855.0 24 | 22,0.2221709504220918,198,218,990.0 25 | 23,0.5402131898438298,534,565,2670.0 26 | 24,0.809137343551417,822,795,4110.0 27 | 25,0.4501869299474497,473,458,2365.0 28 | 26,0.6359339278760364,635,630,3175.0 29 | 27,0.038816637960617476,44,32,220.0 30 | 28,0.7344629130913919,742,735,3710.0 31 | 29,0.44084109181480147,436,456,2180.0 32 | 30,0.6194422086048335,606,622,3030.0 33 | 31,0.6029931112846465,597,593,2985.0 34 | 32,0.6403971844272539,647,646,3235.0 35 | 33,0.2717465398506682,276,289,1380.0 36 | 34,0.24855416742869724,235,246,1175.0 37 | 35,0.5763308838124636,570,569,2850.0 38 | 36,0.7405450486741848,714,705,3570.0 39 | 37,0.6172525826732019,626,641,3130.0 40 | 38,0.700120802412764,701,694,3505.0 41 | 39,0.5423167896328214,563,522,2815.0 42 | 40,0.6277552712627879,654,629,3270.0 43 | 41,0.510041985093407,487,511,2435.0 44 | 42,0.4157466105913598,414,422,2070.0 45 | 43,0.4510121019261413,452,437,2260.0 46 | 44,0.7012283580250138,711,701,3555.0 47 | 45,0.7014667690970937,697,658,3485.0 48 | 46,0.6852081772462212,671,715,3355.0 49 | 47,0.5096763866022709,542,474,2710.0 50 | 48,0.335565507608182,308,342,1540.0 51 | 49,0.3200028644232836,332,311,1660.0 52 | 50,0.858032789858386,850,860,4250.0 53 | 51,0.9194383170042613,909,915,4545.0 54 | 52,0.7578568068409056,752,772,3760.0 55 | 53,0.5369302838745743,553,525,2765.0 56 | 54,0.39535897652586466,390,375,1950.0 57 | 55,0.43263401973770355,429,423,2145.0 58 | 56,0.39964255243330554,369,419,1845.0 59 | 57,0.4622587766434044,445,463,2225.0 60 | 58,0.41501790626033636,385,405,1925.0 61 | 59,0.4726188268262518,467,493,2335.0 62 | 60,0.5867317877398175,584,576,2920.0 63 | 61,0.40309700205544535,392,390,1960.0 64 | 62,0.9030694655323884,922,914,4610.0 65 | 63,0.7433849331215445,734,727,3670.0 66 | 64,0.3226815255161713,354,317,1770.0 67 | 65,0.7373262756586525,759,740,3795.0 68 | 66,0.454388112197214,454,442,2270.0 69 | 67,0.24695643159701502,223,233,1115.0 70 | 68,0.5150454373580923,494,511,2470.0 71 | 69,0.13432862371196186,132,132,660.0 72 | 70,0.8613390294928222,835,875,4175.0 73 | 71,0.2624561595426846,242,251,1210.0 74 | 72,0.4126349899124742,404,381,2020.0 75 | 73,0.32059071702516345,326,315,1630.0 76 | 74,0.5286389910014414,505,530,2525.0 77 | 75,0.4244970945991962,434,403,2170.0 78 | 76,0.7134622280095146,718,734,3590.0 79 | 77,0.3021712168767231,301,302,1505.0 80 | 78,0.5054400433377808,533,527,2665.0 81 | 79,0.35337777159842226,342,310,1710.0 82 | 80,0.856463811641551,850,856,4250.0 83 | 81,0.7690053536358458,769,786,3845.0 84 | 82,0.7705299469984472,764,759,3820.0 85 | 83,0.5495691694617971,545,563,2725.0 86 | 84,0.6599363584839685,668,633,3340.0 87 | 85,0.6729430373082935,692,673,3460.0 88 | 86,0.7200544362170541,710,743,3550.0 89 | 87,0.2763430048981494,258,276,1290.0 90 | 88,0.3958233748791622,389,379,1945.0 91 | 89,0.6500522067252924,638,674,3190.0 92 | 90,0.6985789916748841,733,704,3665.0 93 | 91,0.2805795813185738,276,274,1380.0 94 | 92,0.5576983868667601,550,509,2750.0 95 | 93,0.5478574942296501,527,553,2635.0 96 | 94,0.4546433910593641,470,472,2350.0 97 | 95,0.2152232150310978,222,217,1110.0 98 | 96,0.2312936402601318,230,228,1150.0 99 | 97,0.6131435277110764,611,624,3055.0 100 | 98,0.5657713175442409,570,574,2850.0 101 | 99,0.6302605033076863,626,649,3130.0 102 | -------------------------------------------------------------------------------- /tests/unit/data/build_X.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy.random as npr 3 | import matplotlib.pyplot as pp 4 | 5 | 6 | 7 | 8 | 9 | """ 10 | generate some toy data where a -> b, a-> c, b -> d, and c doesn't effect d. 11 | """ 12 | 13 | n = 100 14 | a = npr.beta(2.5, 2.5, n) 15 | b = npr.binomial( 1000, a) 16 | c = npr.binomial( 1000, a) 17 | d = 5. * b 18 | X = pd.DataFrame( { 'a' : a, 'b' : b, 'c' : c, 'd' : d}) 19 | X.to_csv('./tests/unit/data/X.csv') 20 | 21 | 22 | 23 | """ 24 | generate a toy discrete dataset with the same dependence structure 25 | as above 26 | """ 27 | 28 | n = 2000 29 | a = npr.binomial(1, 0.25, n) 30 | b = (a + npr.binomial(1, 0.75, n)) % 2 31 | c = (a + npr.binomial(1,0.25, n)) % 2 32 | d = (b + npr.binomial(1,0.75, n)) % 2 33 | X = pd.DataFrame( { 'a' : a, 'b' : b, 'c' : c, 'd' : d}) 34 | X.to_csv('./tests/unit/data/discrete.csv') 35 | -------------------------------------------------------------------------------- /tests/unit/data/discrete.csv: -------------------------------------------------------------------------------- 1 | ,a,b,c,d 2 | 0,0,0,0,1 3 | 1,0,1,0,0 4 | 2,0,0,0,0 5 | 3,0,1,0,0 6 | 4,0,0,1,1 7 | 5,1,0,1,0 8 | 6,0,0,1,1 9 | 7,0,1,0,1 10 | 8,0,1,0,0 11 | 9,0,1,0,0 12 | 10,0,1,0,1 13 | 11,0,1,0,0 14 | 12,0,0,1,1 15 | 13,0,1,0,1 16 | 14,1,1,1,0 17 | 15,0,1,1,0 18 | 16,0,0,1,1 19 | 17,1,1,1,0 20 | 18,0,1,0,0 21 | 19,1,1,1,0 22 | 20,1,0,1,0 23 | 21,0,1,1,0 24 | 22,0,1,0,0 25 | 23,0,1,0,1 26 | 24,0,0,0,1 27 | 25,1,0,0,1 28 | 26,1,0,1,1 29 | 27,0,1,0,0 30 | 28,0,1,0,1 31 | 29,0,1,0,0 32 | 30,0,1,0,0 33 | 31,1,0,1,1 34 | 32,0,1,0,0 35 | 33,0,1,1,0 36 | 34,0,1,0,0 37 | 35,0,1,0,0 38 | 36,1,1,1,0 39 | 37,0,1,0,0 40 | 38,0,0,0,1 41 | 39,0,1,0,0 42 | 40,0,1,0,0 43 | 41,0,1,1,0 44 | 42,0,1,0,0 45 | 43,0,1,0,1 46 | 44,0,0,0,1 47 | 45,0,1,0,1 48 | 46,0,1,0,0 49 | 47,1,0,1,1 50 | 48,0,1,1,0 51 | 49,1,1,1,1 52 | 50,0,0,0,1 53 | 51,0,0,0,1 54 | 52,0,1,0,0 55 | 53,0,1,0,1 56 | 54,0,0,0,1 57 | 55,0,1,0,0 58 | 56,0,0,1,1 59 | 57,0,0,0,1 60 | 58,1,0,1,0 61 | 59,1,0,1,1 62 | 60,1,0,1,1 63 | 61,0,0,0,0 64 | 62,0,1,0,0 65 | 63,0,0,0,1 66 | 64,0,1,0,0 67 | 65,0,1,1,0 68 | 66,0,1,0,0 69 | 67,0,1,0,1 70 | 68,1,0,0,0 71 | 69,1,0,1,1 72 | 70,0,1,0,0 73 | 71,0,0,1,1 74 | 72,0,1,1,0 75 | 73,0,0,1,1 76 | 74,0,1,0,0 77 | 75,1,0,0,1 78 | 76,0,1,0,0 79 | 77,0,0,1,1 80 | 78,0,1,0,0 81 | 79,0,1,1,0 82 | 80,0,1,0,0 83 | 81,1,0,1,1 84 | 82,0,1,0,1 85 | 83,0,1,0,0 86 | 84,0,0,0,0 87 | 85,1,0,0,1 88 | 86,1,0,0,1 89 | 87,1,0,0,1 90 | 88,0,1,1,0 91 | 89,0,0,1,1 92 | 90,0,1,0,0 93 | 91,0,1,1,1 94 | 92,1,1,1,1 95 | 93,0,1,1,0 96 | 94,0,0,1,1 97 | 95,0,0,0,0 98 | 96,0,1,1,0 99 | 97,1,0,1,1 100 | 98,0,1,1,0 101 | 99,1,0,1,0 102 | 100,0,1,0,0 103 | 101,1,0,0,1 104 | 102,1,1,1,1 105 | 103,0,1,0,1 106 | 104,1,0,1,0 107 | 105,0,1,1,0 108 | 106,0,1,1,0 109 | 107,1,0,1,1 110 | 108,0,1,0,0 111 | 109,0,0,0,0 112 | 110,1,0,1,0 113 | 111,0,1,0,1 114 | 112,0,1,0,0 115 | 113,0,1,0,0 116 | 114,1,0,1,1 117 | 115,0,0,0,0 118 | 116,0,0,0,1 119 | 117,1,0,0,1 120 | 118,0,1,0,0 121 | 119,0,1,0,0 122 | 120,0,1,0,1 123 | 121,1,0,1,1 124 | 122,0,0,0,0 125 | 123,1,0,1,1 126 | 124,0,1,0,0 127 | 125,0,1,0,1 128 | 126,0,0,0,1 129 | 127,0,1,1,0 130 | 128,1,0,1,0 131 | 129,0,0,0,1 132 | 130,0,1,1,0 133 | 131,1,0,1,1 134 | 132,1,0,1,0 135 | 133,0,0,0,0 136 | 134,0,1,0,0 137 | 135,0,0,0,1 138 | 136,0,1,1,0 139 | 137,0,1,0,0 140 | 138,0,1,0,0 141 | 139,1,0,1,0 142 | 140,1,1,1,0 143 | 141,1,1,0,0 144 | 142,1,0,0,0 145 | 143,1,0,0,1 146 | 144,1,0,1,1 147 | 145,0,1,0,0 148 | 146,1,0,0,1 149 | 147,1,0,0,1 150 | 148,1,0,0,1 151 | 149,0,1,0,0 152 | 150,0,0,0,0 153 | 151,0,1,0,0 154 | 152,1,0,1,0 155 | 153,0,0,0,1 156 | 154,0,1,0,0 157 | 155,0,1,0,0 158 | 156,1,0,1,0 159 | 157,0,0,1,1 160 | 158,0,1,1,0 161 | 159,0,1,0,0 162 | 160,0,1,0,0 163 | 161,0,1,0,1 164 | 162,0,1,1,0 165 | 163,1,0,1,0 166 | 164,0,1,0,0 167 | 165,0,1,0,0 168 | 166,0,1,0,0 169 | 167,0,1,0,0 170 | 168,0,0,0,1 171 | 169,0,0,0,1 172 | 170,0,1,0,0 173 | 171,0,0,1,1 174 | 172,0,1,0,0 175 | 173,0,1,0,0 176 | 174,1,0,1,1 177 | 175,1,0,1,1 178 | 176,1,1,1,1 179 | 177,0,1,1,0 180 | 178,0,1,0,0 181 | 179,0,1,1,0 182 | 180,0,1,0,0 183 | 181,1,0,1,1 184 | 182,0,1,1,0 185 | 183,1,0,0,0 186 | 184,0,1,1,0 187 | 185,0,1,1,0 188 | 186,0,0,0,1 189 | 187,0,1,0,0 190 | 188,0,1,0,0 191 | 189,0,1,1,1 192 | 190,0,1,0,1 193 | 191,0,1,0,0 194 | 192,0,1,0,1 195 | 193,0,1,0,0 196 | 194,0,1,1,0 197 | 195,0,1,1,0 198 | 196,0,0,0,1 199 | 197,0,1,1,0 200 | 198,1,1,0,0 201 | 199,1,0,1,1 202 | 200,0,1,1,1 203 | 201,0,1,0,1 204 | 202,0,1,0,0 205 | 203,0,1,0,0 206 | 204,0,1,1,0 207 | 205,0,0,0,0 208 | 206,0,1,0,0 209 | 207,1,0,0,1 210 | 208,0,1,0,0 211 | 209,0,1,0,0 212 | 210,1,0,1,1 213 | 211,0,1,0,1 214 | 212,0,1,0,0 215 | 213,0,1,0,1 216 | 214,0,0,0,0 217 | 215,0,1,1,0 218 | 216,1,0,1,1 219 | 217,0,1,1,1 220 | 218,0,1,0,0 221 | 219,1,0,0,1 222 | 220,0,1,0,0 223 | 221,0,1,0,0 224 | 222,0,0,0,1 225 | 223,0,1,0,0 226 | 224,0,0,0,1 227 | 225,0,1,0,0 228 | 226,0,1,0,0 229 | 227,0,1,1,0 230 | 228,0,1,1,1 231 | 229,1,0,0,1 232 | 230,0,1,1,0 233 | 231,0,1,1,0 234 | 232,1,0,1,0 235 | 233,1,0,1,0 236 | 234,0,0,0,1 237 | 235,1,0,1,1 238 | 236,1,0,0,1 239 | 237,0,1,1,0 240 | 238,0,1,0,0 241 | 239,0,1,0,1 242 | 240,0,1,0,0 243 | 241,1,0,1,1 244 | 242,1,0,0,1 245 | 243,1,0,1,1 246 | 244,0,1,0,0 247 | 245,0,1,0,0 248 | 246,0,0,0,1 249 | 247,0,1,0,0 250 | 248,0,0,0,1 251 | 249,1,0,1,1 252 | 250,0,0,0,1 253 | 251,1,1,1,0 254 | 252,0,0,1,0 255 | 253,0,1,0,0 256 | 254,1,0,0,0 257 | 255,1,1,1,1 258 | 256,0,0,0,1 259 | 257,1,0,1,1 260 | 258,0,1,0,1 261 | 259,0,1,0,0 262 | 260,0,1,0,0 263 | 261,0,0,0,1 264 | 262,0,1,0,0 265 | 263,0,1,0,1 266 | 264,0,1,1,0 267 | 265,0,1,0,1 268 | 266,0,1,0,0 269 | 267,0,0,0,0 270 | 268,0,1,0,0 271 | 269,1,0,1,1 272 | 270,0,1,0,0 273 | 271,1,0,0,1 274 | 272,0,1,0,0 275 | 273,0,1,1,0 276 | 274,0,1,0,0 277 | 275,0,0,0,0 278 | 276,0,1,0,0 279 | 277,0,1,1,1 280 | 278,0,1,0,1 281 | 279,1,1,1,0 282 | 280,1,1,0,0 283 | 281,0,1,0,1 284 | 282,0,1,0,0 285 | 283,0,1,0,0 286 | 284,0,1,0,0 287 | 285,0,1,0,1 288 | 286,0,1,0,0 289 | 287,0,1,0,0 290 | 288,1,0,1,1 291 | 289,0,1,1,0 292 | 290,1,0,0,1 293 | 291,0,1,1,0 294 | 292,0,0,1,1 295 | 293,0,1,1,0 296 | 294,0,1,0,0 297 | 295,0,1,1,0 298 | 296,0,0,0,1 299 | 297,0,1,0,0 300 | 298,0,1,0,0 301 | 299,1,0,0,0 302 | 300,1,0,1,1 303 | 301,1,1,1,0 304 | 302,1,0,1,0 305 | 303,0,1,0,1 306 | 304,0,1,0,0 307 | 305,0,1,0,0 308 | 306,0,0,0,1 309 | 307,0,0,0,0 310 | 308,0,0,0,1 311 | 309,0,1,1,0 312 | 310,0,1,0,1 313 | 311,0,0,0,1 314 | 312,0,0,0,0 315 | 313,0,1,0,1 316 | 314,1,1,1,1 317 | 315,0,0,0,0 318 | 316,0,1,0,0 319 | 317,0,1,1,0 320 | 318,1,0,1,1 321 | 319,1,0,1,0 322 | 320,0,1,0,0 323 | 321,0,1,1,0 324 | 322,0,1,0,0 325 | 323,1,0,0,1 326 | 324,0,1,1,1 327 | 325,0,1,1,0 328 | 326,0,1,1,0 329 | 327,0,0,0,1 330 | 328,0,0,0,1 331 | 329,0,1,0,0 332 | 330,0,1,0,0 333 | 331,0,1,0,0 334 | 332,0,0,0,1 335 | 333,1,0,1,1 336 | 334,1,1,1,0 337 | 335,0,1,1,0 338 | 336,1,0,1,0 339 | 337,1,0,1,0 340 | 338,0,1,1,0 341 | 339,1,1,1,0 342 | 340,0,1,0,0 343 | 341,1,0,0,1 344 | 342,0,1,0,0 345 | 343,0,1,0,0 346 | 344,1,0,1,0 347 | 345,1,0,1,1 348 | 346,0,0,0,1 349 | 347,0,1,0,0 350 | 348,0,0,0,1 351 | 349,0,1,0,1 352 | 350,0,1,0,0 353 | 351,1,1,0,0 354 | 352,0,1,0,0 355 | 353,0,1,0,0 356 | 354,1,0,1,1 357 | 355,0,1,0,0 358 | 356,1,0,1,1 359 | 357,0,1,0,1 360 | 358,1,0,0,1 361 | 359,0,1,0,1 362 | 360,0,1,0,0 363 | 361,1,1,1,1 364 | 362,0,1,1,0 365 | 363,1,1,1,0 366 | 364,0,1,0,0 367 | 365,1,0,0,1 368 | 366,0,1,0,0 369 | 367,0,0,0,1 370 | 368,0,1,0,0 371 | 369,0,1,1,0 372 | 370,0,1,0,0 373 | 371,0,1,0,0 374 | 372,0,1,0,0 375 | 373,0,0,0,1 376 | 374,0,1,0,0 377 | 375,0,1,0,0 378 | 376,1,0,1,0 379 | 377,0,0,0,1 380 | 378,0,1,0,0 381 | 379,1,0,0,1 382 | 380,1,0,1,1 383 | 381,0,0,1,1 384 | 382,1,0,1,1 385 | 383,0,1,0,0 386 | 384,1,0,1,1 387 | 385,0,1,0,0 388 | 386,1,0,1,0 389 | 387,0,1,1,0 390 | 388,0,1,0,0 391 | 389,0,1,0,0 392 | 390,0,0,1,1 393 | 391,0,1,0,0 394 | 392,0,0,0,0 395 | 393,1,0,1,1 396 | 394,1,1,1,1 397 | 395,0,1,0,0 398 | 396,0,0,1,1 399 | 397,1,1,1,0 400 | 398,1,1,1,0 401 | 399,0,1,1,0 402 | 400,0,1,0,1 403 | 401,0,0,0,1 404 | 402,0,1,0,0 405 | 403,0,0,0,1 406 | 404,0,0,0,1 407 | 405,0,0,0,1 408 | 406,0,1,0,0 409 | 407,0,0,0,1 410 | 408,0,1,1,0 411 | 409,0,1,0,0 412 | 410,0,1,1,1 413 | 411,0,0,0,1 414 | 412,0,1,0,0 415 | 413,0,1,0,1 416 | 414,0,1,0,0 417 | 415,1,1,0,0 418 | 416,0,1,0,0 419 | 417,0,1,0,0 420 | 418,0,1,1,0 421 | 419,0,1,0,1 422 | 420,1,0,1,1 423 | 421,0,1,1,0 424 | 422,0,1,0,0 425 | 423,0,1,0,1 426 | 424,1,0,1,1 427 | 425,0,0,0,1 428 | 426,0,1,1,1 429 | 427,0,1,0,0 430 | 428,0,1,0,0 431 | 429,1,0,1,0 432 | 430,0,1,0,0 433 | 431,0,1,0,0 434 | 432,0,0,0,0 435 | 433,0,0,1,1 436 | 434,0,0,0,1 437 | 435,1,0,1,1 438 | 436,0,1,0,0 439 | 437,0,0,0,1 440 | 438,0,0,1,1 441 | 439,0,1,0,0 442 | 440,1,1,0,0 443 | 441,0,1,0,1 444 | 442,0,1,0,0 445 | 443,0,1,0,0 446 | 444,0,1,0,0 447 | 445,0,1,1,0 448 | 446,0,1,0,0 449 | 447,0,1,1,1 450 | 448,0,1,0,1 451 | 449,0,0,0,1 452 | 450,0,1,0,0 453 | 451,0,1,0,0 454 | 452,0,1,0,1 455 | 453,0,1,0,0 456 | 454,1,1,1,0 457 | 455,0,1,1,0 458 | 456,0,0,0,1 459 | 457,0,1,0,0 460 | 458,0,1,1,0 461 | 459,0,1,0,0 462 | 460,0,1,0,0 463 | 461,1,0,1,0 464 | 462,1,0,1,0 465 | 463,0,1,0,1 466 | 464,1,0,1,1 467 | 465,0,1,0,0 468 | 466,1,0,0,0 469 | 467,0,1,1,0 470 | 468,0,1,0,0 471 | 469,0,1,1,0 472 | 470,1,0,1,1 473 | 471,0,0,1,0 474 | 472,0,0,0,0 475 | 473,1,0,1,0 476 | 474,0,1,1,0 477 | 475,1,0,1,0 478 | 476,0,1,1,1 479 | 477,0,0,0,0 480 | 478,1,0,0,1 481 | 479,0,1,0,0 482 | 480,0,1,0,1 483 | 481,0,1,0,0 484 | 482,0,0,0,1 485 | 483,0,1,0,1 486 | 484,0,1,0,0 487 | 485,0,1,0,1 488 | 486,0,1,0,0 489 | 487,0,1,0,0 490 | 488,1,0,1,0 491 | 489,0,1,0,0 492 | 490,0,1,0,0 493 | 491,0,1,0,0 494 | 492,0,0,0,1 495 | 493,0,0,0,1 496 | 494,0,1,0,1 497 | 495,0,1,1,1 498 | 496,0,0,0,1 499 | 497,0,0,0,0 500 | 498,0,1,0,0 501 | 499,0,1,1,1 502 | 500,1,0,1,1 503 | 501,1,1,0,0 504 | 502,1,0,1,1 505 | 503,0,1,1,0 506 | 504,0,1,0,0 507 | 505,0,0,0,1 508 | 506,0,0,0,1 509 | 507,0,0,0,1 510 | 508,0,0,0,1 511 | 509,0,0,0,1 512 | 510,0,1,1,0 513 | 511,0,1,0,0 514 | 512,0,0,0,1 515 | 513,1,1,1,0 516 | 514,0,1,0,0 517 | 515,0,0,0,1 518 | 516,0,0,0,1 519 | 517,1,0,1,0 520 | 518,0,1,0,0 521 | 519,0,0,1,1 522 | 520,0,1,0,0 523 | 521,0,1,0,1 524 | 522,0,1,0,0 525 | 523,0,1,0,1 526 | 524,1,0,1,1 527 | 525,0,0,1,1 528 | 526,1,0,1,1 529 | 527,0,1,1,0 530 | 528,1,1,1,1 531 | 529,0,1,1,0 532 | 530,0,0,1,1 533 | 531,1,0,1,0 534 | 532,0,1,0,1 535 | 533,1,0,1,1 536 | 534,0,1,1,0 537 | 535,0,1,0,0 538 | 536,1,0,1,1 539 | 537,0,0,0,1 540 | 538,0,1,1,0 541 | 539,0,1,1,0 542 | 540,0,1,0,0 543 | 541,1,0,1,1 544 | 542,0,0,0,1 545 | 543,1,0,1,1 546 | 544,1,0,1,1 547 | 545,0,1,0,1 548 | 546,0,1,0,0 549 | 547,0,0,0,0 550 | 548,0,0,1,1 551 | 549,0,1,1,0 552 | 550,0,1,0,0 553 | 551,1,1,1,0 554 | 552,0,0,0,1 555 | 553,0,1,0,0 556 | 554,0,1,0,1 557 | 555,0,1,0,0 558 | 556,0,1,0,0 559 | 557,1,0,1,1 560 | 558,0,1,0,0 561 | 559,0,1,0,0 562 | 560,0,0,0,1 563 | 561,1,1,1,0 564 | 562,0,1,0,0 565 | 563,0,0,0,1 566 | 564,0,0,0,1 567 | 565,1,1,1,1 568 | 566,1,1,1,0 569 | 567,1,0,1,0 570 | 568,0,1,1,1 571 | 569,1,0,0,1 572 | 570,1,1,0,0 573 | 571,0,1,0,0 574 | 572,0,0,0,1 575 | 573,0,1,0,0 576 | 574,0,1,0,0 577 | 575,0,1,1,0 578 | 576,0,1,0,1 579 | 577,0,0,0,1 580 | 578,0,0,0,0 581 | 579,0,1,1,0 582 | 580,0,1,1,0 583 | 581,0,1,0,0 584 | 582,0,1,1,1 585 | 583,0,1,0,0 586 | 584,0,1,0,0 587 | 585,0,1,0,0 588 | 586,0,1,1,0 589 | 587,0,1,0,0 590 | 588,1,0,1,1 591 | 589,0,0,0,1 592 | 590,1,0,1,0 593 | 591,1,0,0,0 594 | 592,1,0,1,1 595 | 593,0,1,1,0 596 | 594,0,1,0,0 597 | 595,1,0,1,1 598 | 596,0,1,0,0 599 | 597,0,1,0,0 600 | 598,0,1,0,0 601 | 599,1,0,1,1 602 | 600,0,0,0,1 603 | 601,0,1,0,0 604 | 602,0,1,1,1 605 | 603,1,0,0,1 606 | 604,1,1,1,1 607 | 605,1,1,0,0 608 | 606,0,1,0,0 609 | 607,0,1,0,0 610 | 608,0,1,1,0 611 | 609,0,0,0,1 612 | 610,0,1,0,0 613 | 611,0,0,0,1 614 | 612,0,0,0,1 615 | 613,0,0,0,0 616 | 614,1,1,1,1 617 | 615,0,1,0,0 618 | 616,0,1,0,0 619 | 617,0,0,0,1 620 | 618,0,1,0,0 621 | 619,0,1,0,0 622 | 620,0,1,1,0 623 | 621,0,1,0,0 624 | 622,0,1,0,1 625 | 623,0,1,0,1 626 | 624,0,0,0,1 627 | 625,0,0,0,1 628 | 626,0,1,0,0 629 | 627,1,0,1,1 630 | 628,1,0,1,1 631 | 629,0,0,0,0 632 | 630,0,1,1,0 633 | 631,0,1,1,0 634 | 632,0,1,0,0 635 | 633,0,1,0,0 636 | 634,0,0,0,0 637 | 635,1,0,1,1 638 | 636,1,0,1,1 639 | 637,0,1,0,1 640 | 638,1,0,1,1 641 | 639,0,1,1,1 642 | 640,0,1,0,1 643 | 641,0,1,0,1 644 | 642,0,1,0,1 645 | 643,0,1,0,0 646 | 644,0,1,0,0 647 | 645,1,0,1,1 648 | 646,1,1,1,0 649 | 647,0,0,1,1 650 | 648,0,0,1,1 651 | 649,1,0,1,1 652 | 650,0,0,0,1 653 | 651,0,0,1,0 654 | 652,0,1,0,1 655 | 653,0,0,1,0 656 | 654,0,1,0,0 657 | 655,0,0,0,1 658 | 656,0,1,1,0 659 | 657,0,0,0,0 660 | 658,0,1,0,1 661 | 659,1,1,0,0 662 | 660,0,1,0,0 663 | 661,1,0,1,0 664 | 662,0,0,1,0 665 | 663,0,0,0,1 666 | 664,1,0,1,1 667 | 665,0,1,0,0 668 | 666,0,0,0,1 669 | 667,0,0,0,1 670 | 668,0,1,0,0 671 | 669,1,0,0,0 672 | 670,0,1,0,0 673 | 671,0,1,0,1 674 | 672,1,1,0,0 675 | 673,1,0,0,0 676 | 674,0,0,0,1 677 | 675,0,1,0,1 678 | 676,0,1,0,0 679 | 677,1,0,1,1 680 | 678,1,0,1,1 681 | 679,1,1,1,1 682 | 680,0,0,0,1 683 | 681,0,0,0,1 684 | 682,0,1,0,0 685 | 683,1,1,1,0 686 | 684,0,1,0,0 687 | 685,1,0,1,1 688 | 686,0,0,0,0 689 | 687,0,1,0,1 690 | 688,0,1,0,0 691 | 689,0,0,1,1 692 | 690,1,0,0,0 693 | 691,1,0,1,1 694 | 692,0,1,0,0 695 | 693,0,0,0,1 696 | 694,0,0,0,0 697 | 695,0,1,1,0 698 | 696,0,1,0,0 699 | 697,0,1,0,0 700 | 698,0,1,0,0 701 | 699,0,1,1,1 702 | 700,0,1,1,0 703 | 701,1,0,1,0 704 | 702,1,0,1,1 705 | 703,0,1,0,0 706 | 704,1,0,1,1 707 | 705,1,0,0,1 708 | 706,0,0,1,1 709 | 707,0,1,1,1 710 | 708,0,1,0,1 711 | 709,0,1,0,0 712 | 710,0,1,0,0 713 | 711,0,1,1,0 714 | 712,0,0,0,1 715 | 713,1,0,0,0 716 | 714,1,0,1,1 717 | 715,0,1,1,0 718 | 716,0,1,0,0 719 | 717,0,1,0,0 720 | 718,1,0,1,1 721 | 719,1,0,1,1 722 | 720,1,0,1,1 723 | 721,1,0,1,1 724 | 722,0,1,0,0 725 | 723,0,0,0,1 726 | 724,1,0,1,1 727 | 725,0,1,0,0 728 | 726,0,1,0,1 729 | 727,0,0,0,1 730 | 728,1,0,1,0 731 | 729,0,1,0,0 732 | 730,0,1,0,0 733 | 731,0,1,0,0 734 | 732,1,1,0,1 735 | 733,1,0,1,1 736 | 734,1,1,1,0 737 | 735,0,1,0,1 738 | 736,0,1,0,0 739 | 737,0,1,0,0 740 | 738,0,1,0,0 741 | 739,0,1,1,0 742 | 740,0,1,0,1 743 | 741,0,1,0,0 744 | 742,1,0,1,1 745 | 743,0,1,0,0 746 | 744,0,1,0,0 747 | 745,1,0,0,0 748 | 746,1,0,0,1 749 | 747,0,1,0,1 750 | 748,0,0,0,0 751 | 749,1,0,1,0 752 | 750,0,0,0,1 753 | 751,0,1,0,0 754 | 752,0,0,0,1 755 | 753,0,1,0,0 756 | 754,0,0,0,0 757 | 755,0,1,0,0 758 | 756,0,1,1,0 759 | 757,1,1,1,0 760 | 758,0,0,1,0 761 | 759,1,0,0,0 762 | 760,0,0,1,1 763 | 761,1,0,1,1 764 | 762,0,1,0,0 765 | 763,0,0,0,1 766 | 764,0,1,1,0 767 | 765,1,0,1,1 768 | 766,0,1,1,1 769 | 767,0,1,0,0 770 | 768,0,1,0,0 771 | 769,0,1,0,1 772 | 770,1,0,0,0 773 | 771,0,0,1,1 774 | 772,1,0,1,1 775 | 773,0,0,0,1 776 | 774,0,0,0,1 777 | 775,0,1,0,0 778 | 776,1,0,1,1 779 | 777,0,1,0,1 780 | 778,0,1,1,1 781 | 779,0,0,0,1 782 | 780,0,0,1,0 783 | 781,0,1,1,0 784 | 782,1,0,1,1 785 | 783,0,0,0,0 786 | 784,0,1,0,0 787 | 785,0,1,1,1 788 | 786,1,0,1,1 789 | 787,0,1,1,0 790 | 788,0,1,0,0 791 | 789,0,1,0,0 792 | 790,1,0,1,1 793 | 791,0,1,0,0 794 | 792,0,1,0,0 795 | 793,0,0,0,1 796 | 794,0,0,0,0 797 | 795,0,1,0,0 798 | 796,0,1,0,0 799 | 797,0,0,0,1 800 | 798,0,0,1,1 801 | 799,0,1,0,0 802 | 800,0,1,1,0 803 | 801,0,1,1,0 804 | 802,0,1,0,0 805 | 803,0,0,1,1 806 | 804,0,0,1,1 807 | 805,0,1,0,1 808 | 806,1,0,0,1 809 | 807,0,1,0,0 810 | 808,0,1,0,1 811 | 809,0,0,0,1 812 | 810,1,1,1,0 813 | 811,1,0,0,1 814 | 812,1,0,1,1 815 | 813,0,1,0,0 816 | 814,0,1,0,0 817 | 815,0,0,0,1 818 | 816,0,1,0,0 819 | 817,1,0,1,1 820 | 818,0,1,0,0 821 | 819,1,1,1,1 822 | 820,0,0,0,0 823 | 821,0,1,0,1 824 | 822,0,1,1,0 825 | 823,0,1,0,0 826 | 824,0,0,1,1 827 | 825,1,1,1,0 828 | 826,0,1,0,0 829 | 827,0,1,0,0 830 | 828,0,0,0,1 831 | 829,0,1,1,0 832 | 830,0,1,0,0 833 | 831,1,0,1,1 834 | 832,1,0,1,1 835 | 833,1,0,1,1 836 | 834,0,1,0,0 837 | 835,0,1,0,1 838 | 836,0,1,0,0 839 | 837,0,1,1,0 840 | 838,0,1,0,0 841 | 839,0,1,0,0 842 | 840,0,1,0,0 843 | 841,1,0,1,0 844 | 842,0,1,0,0 845 | 843,1,0,1,1 846 | 844,0,1,0,0 847 | 845,0,1,0,1 848 | 846,0,1,1,0 849 | 847,0,1,1,0 850 | 848,0,1,0,0 851 | 849,0,0,0,1 852 | 850,0,1,0,1 853 | 851,0,0,0,1 854 | 852,0,1,0,0 855 | 853,0,0,0,1 856 | 854,1,1,1,0 857 | 855,1,0,1,1 858 | 856,1,0,0,1 859 | 857,0,1,1,0 860 | 858,1,0,1,1 861 | 859,0,1,0,0 862 | 860,1,1,1,1 863 | 861,0,0,1,0 864 | 862,0,1,1,0 865 | 863,1,0,1,0 866 | 864,0,1,0,0 867 | 865,0,1,0,0 868 | 866,1,0,1,1 869 | 867,0,0,0,1 870 | 868,0,0,0,0 871 | 869,0,1,1,0 872 | 870,1,0,1,1 873 | 871,0,0,0,1 874 | 872,1,0,1,0 875 | 873,0,1,0,0 876 | 874,0,1,1,1 877 | 875,0,1,1,0 878 | 876,0,1,0,1 879 | 877,1,0,1,1 880 | 878,0,1,1,0 881 | 879,0,0,0,1 882 | 880,1,0,1,1 883 | 881,0,0,1,1 884 | 882,0,0,0,1 885 | 883,0,0,0,1 886 | 884,0,1,0,0 887 | 885,0,1,0,1 888 | 886,0,1,0,0 889 | 887,0,1,1,1 890 | 888,0,1,0,0 891 | 889,0,0,1,1 892 | 890,0,1,0,1 893 | 891,0,1,0,0 894 | 892,1,0,1,1 895 | 893,0,1,0,0 896 | 894,0,1,0,1 897 | 895,0,1,0,0 898 | 896,0,1,0,1 899 | 897,1,0,1,1 900 | 898,0,1,1,0 901 | 899,1,0,0,1 902 | 900,1,0,0,0 903 | 901,1,0,1,1 904 | 902,0,0,1,1 905 | 903,0,0,1,1 906 | 904,0,0,0,1 907 | 905,0,1,0,1 908 | 906,1,0,1,1 909 | 907,1,0,0,1 910 | 908,1,0,0,1 911 | 909,0,0,0,1 912 | 910,0,0,1,0 913 | 911,0,0,0,1 914 | 912,0,0,0,1 915 | 913,0,1,0,0 916 | 914,0,0,0,1 917 | 915,0,0,0,1 918 | 916,0,1,1,1 919 | 917,1,0,1,0 920 | 918,1,0,1,1 921 | 919,0,0,0,1 922 | 920,0,0,0,1 923 | 921,1,0,1,0 924 | 922,0,1,0,1 925 | 923,0,1,0,0 926 | 924,1,0,1,1 927 | 925,0,0,0,0 928 | 926,0,1,0,1 929 | 927,0,1,1,0 930 | 928,1,0,1,1 931 | 929,1,0,1,1 932 | 930,0,1,1,0 933 | 931,0,0,0,0 934 | 932,0,1,0,1 935 | 933,0,1,0,0 936 | 934,0,1,0,1 937 | 935,0,1,1,0 938 | 936,1,1,1,1 939 | 937,0,1,0,0 940 | 938,0,1,1,0 941 | 939,0,0,0,0 942 | 940,0,1,0,1 943 | 941,0,1,0,0 944 | 942,0,1,0,0 945 | 943,0,0,0,1 946 | 944,0,1,0,1 947 | 945,0,0,0,0 948 | 946,1,0,0,1 949 | 947,0,1,0,0 950 | 948,0,1,0,0 951 | 949,1,0,1,0 952 | 950,0,1,0,0 953 | 951,1,0,1,0 954 | 952,0,1,0,1 955 | 953,0,1,0,0 956 | 954,0,1,0,0 957 | 955,0,1,0,0 958 | 956,0,1,1,0 959 | 957,0,0,0,1 960 | 958,1,0,0,1 961 | 959,1,0,1,1 962 | 960,1,0,1,1 963 | 961,0,0,1,1 964 | 962,0,0,0,0 965 | 963,0,0,0,0 966 | 964,1,0,1,1 967 | 965,0,1,0,0 968 | 966,0,1,0,0 969 | 967,1,1,0,0 970 | 968,1,0,0,1 971 | 969,0,0,1,1 972 | 970,1,1,1,0 973 | 971,0,1,0,0 974 | 972,1,1,1,0 975 | 973,0,0,0,1 976 | 974,1,0,1,0 977 | 975,1,0,1,1 978 | 976,0,1,1,0 979 | 977,0,1,0,0 980 | 978,0,1,0,0 981 | 979,1,0,1,1 982 | 980,0,0,0,1 983 | 981,0,0,0,0 984 | 982,0,0,0,1 985 | 983,0,1,0,0 986 | 984,0,1,1,0 987 | 985,1,0,0,0 988 | 986,0,0,0,1 989 | 987,0,0,1,0 990 | 988,1,1,1,0 991 | 989,1,0,1,1 992 | 990,0,1,0,1 993 | 991,0,1,0,0 994 | 992,0,0,1,1 995 | 993,0,1,0,0 996 | 994,0,1,0,0 997 | 995,0,1,0,0 998 | 996,1,1,1,0 999 | 997,1,0,1,0 1000 | 998,0,1,0,0 1001 | 999,0,1,0,1 1002 | 1000,0,1,1,0 1003 | 1001,0,1,0,1 1004 | 1002,0,1,1,0 1005 | 1003,0,1,1,0 1006 | 1004,0,1,0,1 1007 | 1005,0,0,0,1 1008 | 1006,0,0,0,1 1009 | 1007,0,1,0,0 1010 | 1008,0,1,0,0 1011 | 1009,1,0,0,0 1012 | 1010,0,0,0,1 1013 | 1011,0,1,0,0 1014 | 1012,1,0,0,0 1015 | 1013,0,1,0,0 1016 | 1014,0,1,1,0 1017 | 1015,0,1,0,1 1018 | 1016,0,1,0,0 1019 | 1017,0,1,0,0 1020 | 1018,1,0,1,0 1021 | 1019,0,0,0,0 1022 | 1020,0,0,0,1 1023 | 1021,0,1,0,0 1024 | 1022,0,1,0,0 1025 | 1023,0,0,0,1 1026 | 1024,0,0,0,0 1027 | 1025,0,0,0,1 1028 | 1026,0,1,0,0 1029 | 1027,0,1,0,0 1030 | 1028,1,0,1,1 1031 | 1029,0,1,0,0 1032 | 1030,0,1,0,0 1033 | 1031,0,1,0,0 1034 | 1032,0,1,0,0 1035 | 1033,0,1,0,0 1036 | 1034,0,1,0,0 1037 | 1035,0,0,0,1 1038 | 1036,0,1,1,1 1039 | 1037,1,0,1,0 1040 | 1038,0,0,0,0 1041 | 1039,0,1,0,0 1042 | 1040,1,1,1,0 1043 | 1041,0,1,1,1 1044 | 1042,0,1,0,0 1045 | 1043,0,1,0,0 1046 | 1044,1,0,1,0 1047 | 1045,0,1,0,0 1048 | 1046,0,1,1,0 1049 | 1047,0,1,0,0 1050 | 1048,1,0,0,0 1051 | 1049,0,1,0,0 1052 | 1050,0,0,0,1 1053 | 1051,0,0,0,0 1054 | 1052,0,1,0,1 1055 | 1053,0,1,1,0 1056 | 1054,0,1,0,1 1057 | 1055,0,1,0,0 1058 | 1056,0,0,0,1 1059 | 1057,0,1,0,0 1060 | 1058,0,0,0,0 1061 | 1059,0,1,0,1 1062 | 1060,1,1,1,0 1063 | 1061,0,1,0,1 1064 | 1062,0,1,1,0 1065 | 1063,0,1,0,0 1066 | 1064,1,0,0,1 1067 | 1065,1,0,1,1 1068 | 1066,0,1,0,0 1069 | 1067,0,1,0,1 1070 | 1068,1,0,0,1 1071 | 1069,0,0,0,1 1072 | 1070,0,1,0,0 1073 | 1071,0,0,0,1 1074 | 1072,0,1,0,1 1075 | 1073,0,1,1,0 1076 | 1074,1,0,0,0 1077 | 1075,0,1,0,0 1078 | 1076,0,1,0,0 1079 | 1077,0,1,0,0 1080 | 1078,1,0,1,1 1081 | 1079,0,0,0,1 1082 | 1080,0,0,1,1 1083 | 1081,0,1,1,0 1084 | 1082,0,1,0,0 1085 | 1083,0,1,1,0 1086 | 1084,0,1,0,1 1087 | 1085,0,0,0,1 1088 | 1086,0,0,0,1 1089 | 1087,0,1,0,1 1090 | 1088,0,1,0,0 1091 | 1089,0,1,0,0 1092 | 1090,1,0,1,1 1093 | 1091,1,0,1,1 1094 | 1092,0,1,1,0 1095 | 1093,0,0,1,1 1096 | 1094,1,0,1,0 1097 | 1095,1,0,0,1 1098 | 1096,0,1,1,0 1099 | 1097,0,1,0,0 1100 | 1098,1,0,1,0 1101 | 1099,1,0,1,1 1102 | 1100,0,1,0,1 1103 | 1101,0,1,0,0 1104 | 1102,0,0,1,0 1105 | 1103,0,1,0,0 1106 | 1104,0,1,0,0 1107 | 1105,0,1,0,0 1108 | 1106,0,0,0,1 1109 | 1107,0,1,0,1 1110 | 1108,0,1,1,0 1111 | 1109,0,1,0,0 1112 | 1110,0,1,0,0 1113 | 1111,0,1,0,0 1114 | 1112,0,1,0,0 1115 | 1113,0,1,0,0 1116 | 1114,0,1,0,0 1117 | 1115,0,1,0,0 1118 | 1116,0,1,0,0 1119 | 1117,1,0,0,1 1120 | 1118,0,0,0,1 1121 | 1119,0,1,0,0 1122 | 1120,1,0,1,1 1123 | 1121,0,1,0,1 1124 | 1122,0,1,0,0 1125 | 1123,0,0,0,1 1126 | 1124,0,1,1,0 1127 | 1125,0,0,1,1 1128 | 1126,0,1,0,0 1129 | 1127,0,0,0,1 1130 | 1128,1,1,1,0 1131 | 1129,0,0,0,1 1132 | 1130,0,1,0,1 1133 | 1131,0,1,1,0 1134 | 1132,0,1,0,0 1135 | 1133,0,1,0,0 1136 | 1134,0,1,0,1 1137 | 1135,0,1,0,0 1138 | 1136,0,0,0,0 1139 | 1137,0,1,1,1 1140 | 1138,0,1,0,0 1141 | 1139,1,1,1,1 1142 | 1140,0,1,1,1 1143 | 1141,1,0,0,1 1144 | 1142,0,1,0,0 1145 | 1143,0,1,0,0 1146 | 1144,0,0,1,1 1147 | 1145,0,1,0,0 1148 | 1146,0,1,0,0 1149 | 1147,0,1,0,0 1150 | 1148,0,1,1,1 1151 | 1149,0,1,0,0 1152 | 1150,0,1,0,0 1153 | 1151,0,1,1,0 1154 | 1152,0,1,0,0 1155 | 1153,0,1,0,1 1156 | 1154,0,0,1,1 1157 | 1155,0,1,0,0 1158 | 1156,1,0,1,1 1159 | 1157,0,1,0,1 1160 | 1158,0,1,0,0 1161 | 1159,1,1,0,1 1162 | 1160,0,0,0,0 1163 | 1161,1,1,1,0 1164 | 1162,1,0,1,1 1165 | 1163,0,1,0,0 1166 | 1164,0,1,0,0 1167 | 1165,0,0,0,1 1168 | 1166,0,1,1,0 1169 | 1167,0,1,0,0 1170 | 1168,1,0,1,1 1171 | 1169,0,1,0,0 1172 | 1170,0,1,0,0 1173 | 1171,1,0,1,1 1174 | 1172,1,0,0,1 1175 | 1173,0,1,0,0 1176 | 1174,0,0,0,0 1177 | 1175,0,1,0,0 1178 | 1176,1,0,1,0 1179 | 1177,0,1,1,0 1180 | 1178,0,1,0,1 1181 | 1179,0,1,0,0 1182 | 1180,1,0,1,1 1183 | 1181,0,1,1,0 1184 | 1182,1,0,1,0 1185 | 1183,0,0,0,1 1186 | 1184,0,0,0,1 1187 | 1185,1,0,1,1 1188 | 1186,0,1,0,0 1189 | 1187,0,1,0,0 1190 | 1188,0,1,0,0 1191 | 1189,1,1,1,0 1192 | 1190,0,1,0,0 1193 | 1191,0,1,0,1 1194 | 1192,0,1,0,0 1195 | 1193,0,0,0,1 1196 | 1194,1,1,1,0 1197 | 1195,0,1,0,0 1198 | 1196,0,1,0,0 1199 | 1197,1,0,1,0 1200 | 1198,0,1,1,0 1201 | 1199,0,0,0,1 1202 | 1200,0,1,1,0 1203 | 1201,0,1,0,0 1204 | 1202,0,1,0,0 1205 | 1203,1,1,0,0 1206 | 1204,0,0,0,1 1207 | 1205,0,1,0,0 1208 | 1206,0,1,1,0 1209 | 1207,0,0,0,1 1210 | 1208,0,0,0,1 1211 | 1209,0,1,1,0 1212 | 1210,1,1,0,0 1213 | 1211,1,0,0,0 1214 | 1212,1,0,1,1 1215 | 1213,0,1,0,0 1216 | 1214,0,1,0,0 1217 | 1215,0,1,1,0 1218 | 1216,0,1,0,0 1219 | 1217,0,1,0,0 1220 | 1218,0,1,0,1 1221 | 1219,1,1,1,1 1222 | 1220,0,0,0,1 1223 | 1221,1,1,1,0 1224 | 1222,0,1,0,0 1225 | 1223,1,0,1,1 1226 | 1224,0,1,1,0 1227 | 1225,0,0,0,1 1228 | 1226,0,1,0,0 1229 | 1227,0,0,1,0 1230 | 1228,0,1,0,0 1231 | 1229,1,0,1,1 1232 | 1230,0,1,0,1 1233 | 1231,0,1,0,0 1234 | 1232,0,1,0,0 1235 | 1233,0,1,1,0 1236 | 1234,0,1,1,0 1237 | 1235,0,1,0,0 1238 | 1236,0,1,0,0 1239 | 1237,0,1,0,1 1240 | 1238,0,0,0,1 1241 | 1239,0,1,0,1 1242 | 1240,0,1,0,0 1243 | 1241,0,1,0,0 1244 | 1242,0,1,0,0 1245 | 1243,1,0,1,1 1246 | 1244,0,1,0,1 1247 | 1245,1,0,0,1 1248 | 1246,0,0,0,1 1249 | 1247,0,1,0,0 1250 | 1248,0,1,0,0 1251 | 1249,1,0,0,1 1252 | 1250,0,1,0,1 1253 | 1251,1,0,1,1 1254 | 1252,1,1,1,1 1255 | 1253,0,1,0,0 1256 | 1254,1,0,1,1 1257 | 1255,1,1,1,0 1258 | 1256,1,0,1,1 1259 | 1257,0,1,0,0 1260 | 1258,0,1,1,0 1261 | 1259,0,1,0,1 1262 | 1260,0,1,0,0 1263 | 1261,0,1,0,0 1264 | 1262,0,0,1,1 1265 | 1263,1,0,1,1 1266 | 1264,0,0,0,1 1267 | 1265,0,0,0,1 1268 | 1266,0,1,1,0 1269 | 1267,0,0,1,1 1270 | 1268,0,1,0,0 1271 | 1269,0,1,0,0 1272 | 1270,0,1,0,0 1273 | 1271,0,0,0,1 1274 | 1272,1,1,1,0 1275 | 1273,0,1,0,0 1276 | 1274,0,0,1,1 1277 | 1275,0,1,0,1 1278 | 1276,0,1,0,0 1279 | 1277,0,1,1,0 1280 | 1278,0,1,1,1 1281 | 1279,0,1,0,0 1282 | 1280,0,1,1,1 1283 | 1281,1,0,1,1 1284 | 1282,1,0,1,1 1285 | 1283,1,0,0,1 1286 | 1284,0,1,0,0 1287 | 1285,0,0,0,1 1288 | 1286,0,1,0,0 1289 | 1287,0,1,1,0 1290 | 1288,0,0,0,1 1291 | 1289,1,0,0,1 1292 | 1290,1,1,0,0 1293 | 1291,1,1,1,0 1294 | 1292,0,1,0,0 1295 | 1293,1,0,1,1 1296 | 1294,0,1,0,0 1297 | 1295,0,1,0,0 1298 | 1296,0,0,0,1 1299 | 1297,1,0,0,1 1300 | 1298,0,1,0,0 1301 | 1299,1,0,1,0 1302 | 1300,0,1,0,0 1303 | 1301,0,0,0,1 1304 | 1302,0,0,0,1 1305 | 1303,0,1,0,1 1306 | 1304,1,0,1,1 1307 | 1305,0,1,0,0 1308 | 1306,0,1,0,0 1309 | 1307,0,1,1,0 1310 | 1308,1,0,0,1 1311 | 1309,0,1,0,1 1312 | 1310,0,0,0,1 1313 | 1311,1,0,1,1 1314 | 1312,0,1,0,0 1315 | 1313,1,0,1,1 1316 | 1314,0,1,0,0 1317 | 1315,0,0,0,0 1318 | 1316,1,0,1,1 1319 | 1317,0,1,0,0 1320 | 1318,0,1,0,0 1321 | 1319,0,1,0,1 1322 | 1320,0,0,1,1 1323 | 1321,1,1,1,1 1324 | 1322,1,1,1,1 1325 | 1323,0,1,0,0 1326 | 1324,0,0,0,1 1327 | 1325,0,0,0,1 1328 | 1326,0,1,1,0 1329 | 1327,0,1,0,0 1330 | 1328,0,1,1,0 1331 | 1329,0,0,0,1 1332 | 1330,0,1,0,0 1333 | 1331,0,1,0,0 1334 | 1332,1,0,1,0 1335 | 1333,0,0,0,1 1336 | 1334,0,1,1,1 1337 | 1335,0,1,1,0 1338 | 1336,1,0,0,1 1339 | 1337,0,1,0,0 1340 | 1338,0,0,0,0 1341 | 1339,0,0,0,1 1342 | 1340,0,1,0,1 1343 | 1341,0,0,1,0 1344 | 1342,0,0,0,1 1345 | 1343,0,1,1,0 1346 | 1344,0,1,1,0 1347 | 1345,0,1,0,0 1348 | 1346,0,0,1,1 1349 | 1347,0,0,1,1 1350 | 1348,0,1,0,0 1351 | 1349,0,1,1,0 1352 | 1350,1,0,1,0 1353 | 1351,0,0,0,1 1354 | 1352,0,1,0,0 1355 | 1353,0,1,0,0 1356 | 1354,0,1,0,0 1357 | 1355,1,0,1,1 1358 | 1356,0,0,0,1 1359 | 1357,0,1,1,1 1360 | 1358,0,1,1,1 1361 | 1359,0,1,0,0 1362 | 1360,1,0,0,1 1363 | 1361,0,1,0,1 1364 | 1362,0,1,1,0 1365 | 1363,0,0,1,1 1366 | 1364,0,1,0,0 1367 | 1365,1,0,1,1 1368 | 1366,0,1,0,0 1369 | 1367,1,0,1,1 1370 | 1368,0,0,0,1 1371 | 1369,1,0,0,1 1372 | 1370,0,1,0,0 1373 | 1371,0,1,0,0 1374 | 1372,0,1,0,0 1375 | 1373,0,1,0,0 1376 | 1374,0,1,0,1 1377 | 1375,0,1,0,0 1378 | 1376,0,0,0,1 1379 | 1377,1,0,0,1 1380 | 1378,0,0,0,0 1381 | 1379,0,0,0,1 1382 | 1380,0,1,0,1 1383 | 1381,0,0,0,1 1384 | 1382,1,0,1,1 1385 | 1383,0,1,1,0 1386 | 1384,0,0,0,1 1387 | 1385,0,1,0,0 1388 | 1386,0,1,0,0 1389 | 1387,0,0,0,1 1390 | 1388,0,1,0,1 1391 | 1389,0,0,0,1 1392 | 1390,0,0,0,1 1393 | 1391,0,0,1,1 1394 | 1392,1,0,1,1 1395 | 1393,0,1,0,1 1396 | 1394,0,1,1,0 1397 | 1395,0,1,0,0 1398 | 1396,1,1,0,1 1399 | 1397,1,0,0,1 1400 | 1398,0,0,1,1 1401 | 1399,0,0,0,1 1402 | 1400,0,0,0,1 1403 | 1401,0,1,0,1 1404 | 1402,0,1,0,0 1405 | 1403,1,0,1,1 1406 | 1404,0,1,1,0 1407 | 1405,1,0,1,0 1408 | 1406,0,1,1,0 1409 | 1407,0,0,1,0 1410 | 1408,0,1,0,0 1411 | 1409,0,1,0,0 1412 | 1410,0,0,0,1 1413 | 1411,0,1,0,1 1414 | 1412,0,0,0,1 1415 | 1413,1,0,1,1 1416 | 1414,0,1,0,1 1417 | 1415,0,1,0,1 1418 | 1416,0,1,0,0 1419 | 1417,0,1,0,0 1420 | 1418,1,1,1,0 1421 | 1419,0,0,0,1 1422 | 1420,1,1,0,0 1423 | 1421,0,1,0,1 1424 | 1422,0,1,1,0 1425 | 1423,0,1,1,0 1426 | 1424,0,1,0,0 1427 | 1425,0,0,0,1 1428 | 1426,0,0,1,1 1429 | 1427,1,1,1,0 1430 | 1428,0,1,0,1 1431 | 1429,0,1,0,0 1432 | 1430,0,1,0,0 1433 | 1431,0,0,0,1 1434 | 1432,0,1,1,0 1435 | 1433,0,1,1,0 1436 | 1434,0,0,0,0 1437 | 1435,0,1,0,0 1438 | 1436,0,1,0,1 1439 | 1437,0,1,0,0 1440 | 1438,0,1,0,0 1441 | 1439,0,1,1,0 1442 | 1440,0,1,0,0 1443 | 1441,0,1,0,1 1444 | 1442,0,0,0,1 1445 | 1443,0,1,0,1 1446 | 1444,0,1,0,0 1447 | 1445,0,0,0,1 1448 | 1446,0,1,0,1 1449 | 1447,1,0,1,1 1450 | 1448,0,0,0,0 1451 | 1449,0,1,0,0 1452 | 1450,0,1,0,1 1453 | 1451,0,1,0,0 1454 | 1452,1,1,1,0 1455 | 1453,0,1,0,0 1456 | 1454,0,1,1,0 1457 | 1455,1,0,0,0 1458 | 1456,0,0,0,0 1459 | 1457,0,1,0,0 1460 | 1458,0,1,1,0 1461 | 1459,0,0,0,0 1462 | 1460,0,1,1,1 1463 | 1461,0,1,0,0 1464 | 1462,0,1,0,0 1465 | 1463,0,1,0,0 1466 | 1464,1,0,1,1 1467 | 1465,1,0,1,1 1468 | 1466,1,0,1,1 1469 | 1467,0,1,0,0 1470 | 1468,0,1,1,0 1471 | 1469,0,0,1,0 1472 | 1470,0,1,0,0 1473 | 1471,1,1,0,0 1474 | 1472,1,0,1,1 1475 | 1473,0,0,1,1 1476 | 1474,0,1,0,0 1477 | 1475,1,1,1,0 1478 | 1476,0,1,0,0 1479 | 1477,1,1,0,0 1480 | 1478,1,1,1,1 1481 | 1479,1,0,1,1 1482 | 1480,0,1,0,0 1483 | 1481,0,1,0,0 1484 | 1482,0,1,0,0 1485 | 1483,0,1,0,0 1486 | 1484,0,1,1,0 1487 | 1485,0,1,0,0 1488 | 1486,1,0,0,0 1489 | 1487,1,0,1,1 1490 | 1488,0,1,1,0 1491 | 1489,0,0,0,0 1492 | 1490,0,1,0,1 1493 | 1491,0,1,0,0 1494 | 1492,0,1,0,0 1495 | 1493,0,1,1,1 1496 | 1494,0,1,0,0 1497 | 1495,0,1,1,1 1498 | 1496,0,1,0,0 1499 | 1497,0,0,1,0 1500 | 1498,1,0,0,1 1501 | 1499,0,1,0,1 1502 | 1500,0,1,0,0 1503 | 1501,0,1,0,0 1504 | 1502,0,1,0,0 1505 | 1503,0,1,0,1 1506 | 1504,0,1,0,0 1507 | 1505,0,1,0,0 1508 | 1506,1,0,0,1 1509 | 1507,0,1,1,0 1510 | 1508,0,0,0,1 1511 | 1509,0,1,0,1 1512 | 1510,0,1,1,0 1513 | 1511,0,1,1,0 1514 | 1512,1,0,1,1 1515 | 1513,0,1,0,0 1516 | 1514,0,1,0,0 1517 | 1515,0,0,0,1 1518 | 1516,0,1,0,0 1519 | 1517,1,0,1,1 1520 | 1518,0,1,0,0 1521 | 1519,1,0,1,1 1522 | 1520,1,0,0,1 1523 | 1521,0,1,0,0 1524 | 1522,0,1,1,1 1525 | 1523,1,1,1,0 1526 | 1524,0,1,1,0 1527 | 1525,0,1,1,1 1528 | 1526,0,1,1,0 1529 | 1527,1,0,1,1 1530 | 1528,0,1,0,1 1531 | 1529,0,1,0,1 1532 | 1530,0,1,0,0 1533 | 1531,0,0,1,1 1534 | 1532,0,0,0,0 1535 | 1533,1,0,1,1 1536 | 1534,0,1,0,0 1537 | 1535,0,1,0,0 1538 | 1536,0,1,1,1 1539 | 1537,0,1,0,0 1540 | 1538,0,1,0,0 1541 | 1539,0,1,1,0 1542 | 1540,0,1,0,0 1543 | 1541,0,1,1,1 1544 | 1542,1,0,1,1 1545 | 1543,0,1,0,0 1546 | 1544,1,0,1,1 1547 | 1545,0,1,0,0 1548 | 1546,1,0,1,1 1549 | 1547,1,0,0,1 1550 | 1548,1,1,1,0 1551 | 1549,0,1,1,0 1552 | 1550,0,1,1,1 1553 | 1551,1,0,1,1 1554 | 1552,1,0,0,1 1555 | 1553,0,1,0,0 1556 | 1554,0,1,0,0 1557 | 1555,0,1,0,0 1558 | 1556,0,1,0,0 1559 | 1557,1,0,1,1 1560 | 1558,1,0,1,1 1561 | 1559,0,1,0,1 1562 | 1560,1,0,0,1 1563 | 1561,0,1,0,0 1564 | 1562,0,1,0,1 1565 | 1563,0,1,1,0 1566 | 1564,0,1,0,0 1567 | 1565,1,0,0,1 1568 | 1566,1,0,1,1 1569 | 1567,1,0,1,1 1570 | 1568,0,1,0,0 1571 | 1569,0,1,0,0 1572 | 1570,0,1,0,0 1573 | 1571,0,1,1,0 1574 | 1572,0,1,1,0 1575 | 1573,0,0,0,0 1576 | 1574,1,0,1,1 1577 | 1575,0,1,0,0 1578 | 1576,1,0,1,1 1579 | 1577,0,1,1,0 1580 | 1578,0,0,0,1 1581 | 1579,0,1,0,0 1582 | 1580,1,1,1,1 1583 | 1581,0,1,0,0 1584 | 1582,0,1,0,0 1585 | 1583,0,0,0,1 1586 | 1584,0,1,0,1 1587 | 1585,0,1,1,1 1588 | 1586,1,0,1,1 1589 | 1587,0,0,0,1 1590 | 1588,0,1,0,0 1591 | 1589,1,0,1,0 1592 | 1590,0,0,0,1 1593 | 1591,0,1,1,1 1594 | 1592,0,1,0,0 1595 | 1593,0,1,1,0 1596 | 1594,1,0,0,1 1597 | 1595,0,0,1,1 1598 | 1596,0,1,1,1 1599 | 1597,0,1,0,1 1600 | 1598,1,0,1,1 1601 | 1599,1,0,1,1 1602 | 1600,0,1,0,0 1603 | 1601,0,1,1,1 1604 | 1602,0,0,1,0 1605 | 1603,0,0,0,1 1606 | 1604,0,1,0,0 1607 | 1605,1,1,1,0 1608 | 1606,1,1,1,0 1609 | 1607,1,0,1,1 1610 | 1608,0,1,1,0 1611 | 1609,1,0,1,1 1612 | 1610,0,1,0,0 1613 | 1611,1,0,1,1 1614 | 1612,0,0,0,0 1615 | 1613,0,0,1,1 1616 | 1614,0,0,0,0 1617 | 1615,1,0,1,0 1618 | 1616,1,0,0,1 1619 | 1617,0,1,0,0 1620 | 1618,0,0,0,1 1621 | 1619,1,0,1,1 1622 | 1620,0,0,0,1 1623 | 1621,0,1,1,1 1624 | 1622,0,1,0,1 1625 | 1623,0,1,0,0 1626 | 1624,1,0,0,0 1627 | 1625,1,1,1,1 1628 | 1626,0,1,1,0 1629 | 1627,0,0,1,0 1630 | 1628,1,1,1,1 1631 | 1629,0,1,0,0 1632 | 1630,0,1,0,0 1633 | 1631,0,1,1,0 1634 | 1632,0,1,0,1 1635 | 1633,0,1,0,0 1636 | 1634,0,1,0,0 1637 | 1635,0,1,0,0 1638 | 1636,1,0,1,1 1639 | 1637,0,1,1,1 1640 | 1638,0,1,1,0 1641 | 1639,1,0,1,1 1642 | 1640,1,1,1,0 1643 | 1641,0,0,0,1 1644 | 1642,0,1,0,0 1645 | 1643,0,1,0,0 1646 | 1644,0,1,0,0 1647 | 1645,0,1,0,0 1648 | 1646,1,1,1,1 1649 | 1647,0,1,0,1 1650 | 1648,1,1,0,0 1651 | 1649,0,1,0,0 1652 | 1650,0,1,0,1 1653 | 1651,0,0,0,0 1654 | 1652,1,0,1,1 1655 | 1653,0,1,0,1 1656 | 1654,0,1,0,0 1657 | 1655,0,0,0,0 1658 | 1656,0,1,0,0 1659 | 1657,0,1,0,0 1660 | 1658,1,0,1,1 1661 | 1659,1,1,1,1 1662 | 1660,0,0,1,0 1663 | 1661,0,0,1,1 1664 | 1662,0,1,0,0 1665 | 1663,1,0,1,0 1666 | 1664,0,1,0,0 1667 | 1665,0,1,1,0 1668 | 1666,0,1,0,0 1669 | 1667,0,1,0,0 1670 | 1668,0,1,0,0 1671 | 1669,0,1,1,0 1672 | 1670,0,0,0,1 1673 | 1671,1,0,1,1 1674 | 1672,0,0,1,1 1675 | 1673,0,1,1,0 1676 | 1674,0,0,1,1 1677 | 1675,0,1,1,1 1678 | 1676,1,0,1,1 1679 | 1677,1,0,1,0 1680 | 1678,0,1,0,1 1681 | 1679,0,1,0,1 1682 | 1680,0,0,0,0 1683 | 1681,0,1,0,0 1684 | 1682,0,1,0,1 1685 | 1683,0,1,0,0 1686 | 1684,0,0,1,1 1687 | 1685,0,1,0,0 1688 | 1686,0,0,1,1 1689 | 1687,0,1,1,0 1690 | 1688,0,1,0,0 1691 | 1689,0,0,0,0 1692 | 1690,0,1,1,0 1693 | 1691,0,1,1,0 1694 | 1692,0,1,0,0 1695 | 1693,0,1,1,0 1696 | 1694,0,1,0,0 1697 | 1695,0,1,1,0 1698 | 1696,0,1,0,1 1699 | 1697,0,1,0,1 1700 | 1698,0,0,0,0 1701 | 1699,0,1,0,1 1702 | 1700,1,0,1,0 1703 | 1701,0,1,0,1 1704 | 1702,0,1,0,0 1705 | 1703,1,0,1,0 1706 | 1704,0,1,0,1 1707 | 1705,0,1,0,0 1708 | 1706,0,0,0,1 1709 | 1707,0,1,0,0 1710 | 1708,0,1,0,0 1711 | 1709,0,1,1,0 1712 | 1710,1,0,1,1 1713 | 1711,0,1,1,0 1714 | 1712,0,1,0,0 1715 | 1713,0,1,0,0 1716 | 1714,0,0,0,0 1717 | 1715,0,1,0,0 1718 | 1716,0,1,1,0 1719 | 1717,1,1,0,0 1720 | 1718,1,0,0,1 1721 | 1719,1,0,1,0 1722 | 1720,0,1,0,0 1723 | 1721,0,1,0,0 1724 | 1722,0,1,0,0 1725 | 1723,0,1,1,0 1726 | 1724,0,1,1,1 1727 | 1725,0,0,0,0 1728 | 1726,0,1,1,0 1729 | 1727,1,1,1,0 1730 | 1728,1,0,1,1 1731 | 1729,0,1,0,0 1732 | 1730,0,1,0,0 1733 | 1731,1,1,1,0 1734 | 1732,0,1,1,0 1735 | 1733,0,1,0,0 1736 | 1734,1,0,1,1 1737 | 1735,0,1,0,0 1738 | 1736,0,0,0,0 1739 | 1737,0,0,1,0 1740 | 1738,1,0,1,1 1741 | 1739,0,1,0,0 1742 | 1740,1,0,1,1 1743 | 1741,0,0,0,0 1744 | 1742,0,0,0,1 1745 | 1743,0,1,1,0 1746 | 1744,0,1,0,0 1747 | 1745,0,1,1,0 1748 | 1746,1,0,1,1 1749 | 1747,0,1,0,0 1750 | 1748,1,1,1,0 1751 | 1749,0,1,1,0 1752 | 1750,0,1,0,0 1753 | 1751,0,0,0,0 1754 | 1752,0,0,0,1 1755 | 1753,1,1,1,0 1756 | 1754,0,1,1,1 1757 | 1755,0,1,0,0 1758 | 1756,0,1,1,1 1759 | 1757,0,1,1,0 1760 | 1758,0,1,1,0 1761 | 1759,0,1,0,1 1762 | 1760,1,0,0,1 1763 | 1761,0,1,0,1 1764 | 1762,0,1,1,1 1765 | 1763,1,1,1,0 1766 | 1764,0,1,0,0 1767 | 1765,0,1,0,0 1768 | 1766,1,0,1,1 1769 | 1767,0,1,0,0 1770 | 1768,0,1,0,0 1771 | 1769,0,1,0,0 1772 | 1770,0,1,0,0 1773 | 1771,0,1,0,0 1774 | 1772,0,1,0,0 1775 | 1773,0,1,0,0 1776 | 1774,1,0,1,1 1777 | 1775,0,0,0,1 1778 | 1776,0,1,0,0 1779 | 1777,0,1,0,1 1780 | 1778,0,1,0,1 1781 | 1779,0,1,0,1 1782 | 1780,0,1,0,0 1783 | 1781,0,1,1,0 1784 | 1782,1,1,1,0 1785 | 1783,1,0,1,0 1786 | 1784,1,1,1,0 1787 | 1785,0,1,1,0 1788 | 1786,0,1,0,0 1789 | 1787,0,1,0,0 1790 | 1788,0,1,0,0 1791 | 1789,0,0,0,1 1792 | 1790,0,0,1,0 1793 | 1791,0,1,0,0 1794 | 1792,0,1,1,0 1795 | 1793,0,0,0,0 1796 | 1794,1,0,1,1 1797 | 1795,0,1,0,1 1798 | 1796,1,0,1,0 1799 | 1797,0,1,0,1 1800 | 1798,0,1,0,0 1801 | 1799,1,0,0,1 1802 | 1800,0,1,0,0 1803 | 1801,1,1,1,1 1804 | 1802,1,0,1,1 1805 | 1803,0,1,0,0 1806 | 1804,0,1,0,1 1807 | 1805,0,1,1,0 1808 | 1806,1,1,1,0 1809 | 1807,1,0,1,1 1810 | 1808,0,1,0,0 1811 | 1809,0,1,1,0 1812 | 1810,1,0,1,1 1813 | 1811,0,1,0,0 1814 | 1812,0,1,0,0 1815 | 1813,0,1,0,0 1816 | 1814,1,0,0,0 1817 | 1815,0,1,0,0 1818 | 1816,0,0,0,0 1819 | 1817,1,0,1,0 1820 | 1818,1,0,0,1 1821 | 1819,0,1,0,0 1822 | 1820,1,0,1,1 1823 | 1821,0,1,0,0 1824 | 1822,0,0,0,1 1825 | 1823,0,1,0,0 1826 | 1824,1,0,0,0 1827 | 1825,1,0,0,1 1828 | 1826,1,0,1,1 1829 | 1827,0,0,1,1 1830 | 1828,0,0,0,1 1831 | 1829,0,1,0,0 1832 | 1830,1,0,1,1 1833 | 1831,1,0,0,1 1834 | 1832,1,1,0,0 1835 | 1833,0,1,0,0 1836 | 1834,0,1,0,1 1837 | 1835,0,1,0,0 1838 | 1836,0,1,0,0 1839 | 1837,0,1,0,0 1840 | 1838,1,0,1,0 1841 | 1839,0,0,0,0 1842 | 1840,0,1,0,0 1843 | 1841,0,1,0,0 1844 | 1842,0,1,1,0 1845 | 1843,0,0,1,1 1846 | 1844,0,1,1,0 1847 | 1845,0,0,0,1 1848 | 1846,0,1,0,0 1849 | 1847,0,1,0,1 1850 | 1848,1,0,1,1 1851 | 1849,0,1,0,0 1852 | 1850,0,1,0,1 1853 | 1851,1,1,1,0 1854 | 1852,0,1,0,1 1855 | 1853,1,0,1,1 1856 | 1854,1,1,0,1 1857 | 1855,0,1,0,0 1858 | 1856,0,1,1,1 1859 | 1857,0,1,0,0 1860 | 1858,0,1,0,0 1861 | 1859,1,0,1,1 1862 | 1860,0,1,1,0 1863 | 1861,0,1,0,0 1864 | 1862,0,1,1,0 1865 | 1863,0,1,0,0 1866 | 1864,0,1,0,0 1867 | 1865,0,1,0,0 1868 | 1866,0,1,0,0 1869 | 1867,0,0,0,1 1870 | 1868,0,0,0,1 1871 | 1869,0,0,1,1 1872 | 1870,1,0,1,1 1873 | 1871,0,1,0,0 1874 | 1872,0,1,0,0 1875 | 1873,0,1,1,0 1876 | 1874,1,0,1,1 1877 | 1875,0,1,1,0 1878 | 1876,0,0,0,1 1879 | 1877,0,1,0,0 1880 | 1878,0,1,0,0 1881 | 1879,0,1,0,0 1882 | 1880,0,1,1,0 1883 | 1881,1,0,1,0 1884 | 1882,0,1,1,0 1885 | 1883,0,1,0,1 1886 | 1884,0,1,0,0 1887 | 1885,0,1,0,0 1888 | 1886,0,1,1,1 1889 | 1887,0,0,0,1 1890 | 1888,1,0,1,1 1891 | 1889,1,0,0,1 1892 | 1890,0,0,0,1 1893 | 1891,0,1,1,0 1894 | 1892,0,1,0,0 1895 | 1893,0,1,0,0 1896 | 1894,0,1,1,0 1897 | 1895,0,1,0,0 1898 | 1896,0,1,1,0 1899 | 1897,0,1,1,0 1900 | 1898,0,1,0,1 1901 | 1899,0,1,0,0 1902 | 1900,0,1,1,0 1903 | 1901,0,1,0,0 1904 | 1902,1,0,1,1 1905 | 1903,1,0,1,1 1906 | 1904,0,1,0,0 1907 | 1905,0,1,1,0 1908 | 1906,0,1,1,0 1909 | 1907,0,1,1,0 1910 | 1908,0,0,0,0 1911 | 1909,0,1,1,0 1912 | 1910,0,1,1,1 1913 | 1911,1,0,1,0 1914 | 1912,0,0,1,0 1915 | 1913,0,0,0,1 1916 | 1914,0,1,0,0 1917 | 1915,0,0,1,1 1918 | 1916,0,1,0,0 1919 | 1917,1,1,1,0 1920 | 1918,0,0,0,0 1921 | 1919,0,1,0,0 1922 | 1920,1,1,1,0 1923 | 1921,1,0,0,1 1924 | 1922,0,1,0,0 1925 | 1923,0,0,0,1 1926 | 1924,0,1,0,0 1927 | 1925,0,1,1,0 1928 | 1926,0,0,0,1 1929 | 1927,0,1,1,0 1930 | 1928,1,0,1,0 1931 | 1929,0,1,0,1 1932 | 1930,0,0,0,1 1933 | 1931,1,0,1,1 1934 | 1932,0,1,0,0 1935 | 1933,0,1,0,0 1936 | 1934,0,0,1,0 1937 | 1935,1,0,0,1 1938 | 1936,0,1,0,0 1939 | 1937,1,0,1,1 1940 | 1938,0,0,0,1 1941 | 1939,0,1,0,1 1942 | 1940,1,0,1,1 1943 | 1941,0,1,0,0 1944 | 1942,0,1,0,0 1945 | 1943,0,1,0,0 1946 | 1944,0,1,0,0 1947 | 1945,0,1,0,0 1948 | 1946,0,0,0,1 1949 | 1947,0,1,0,1 1950 | 1948,0,1,1,0 1951 | 1949,0,1,0,0 1952 | 1950,0,0,0,1 1953 | 1951,0,1,0,0 1954 | 1952,1,1,0,0 1955 | 1953,0,1,0,0 1956 | 1954,1,0,1,1 1957 | 1955,0,0,0,0 1958 | 1956,0,1,1,0 1959 | 1957,1,0,1,1 1960 | 1958,0,1,0,0 1961 | 1959,0,1,1,0 1962 | 1960,0,1,0,0 1963 | 1961,1,0,1,1 1964 | 1962,0,1,1,0 1965 | 1963,1,1,0,0 1966 | 1964,0,0,1,1 1967 | 1965,0,1,0,0 1968 | 1966,0,1,0,0 1969 | 1967,0,1,0,0 1970 | 1968,0,1,1,0 1971 | 1969,0,0,1,0 1972 | 1970,0,0,0,1 1973 | 1971,0,1,0,0 1974 | 1972,0,1,1,1 1975 | 1973,0,1,0,0 1976 | 1974,0,0,0,1 1977 | 1975,1,0,0,1 1978 | 1976,1,1,1,0 1979 | 1977,0,1,0,0 1980 | 1978,1,1,1,0 1981 | 1979,0,1,1,0 1982 | 1980,0,0,0,0 1983 | 1981,0,1,1,0 1984 | 1982,0,0,0,1 1985 | 1983,0,1,1,0 1986 | 1984,0,1,0,0 1987 | 1985,1,1,1,1 1988 | 1986,1,0,1,1 1989 | 1987,0,1,1,0 1990 | 1988,0,1,0,1 1991 | 1989,0,1,0,1 1992 | 1990,0,1,0,0 1993 | 1991,0,1,1,0 1994 | 1992,1,0,1,1 1995 | 1993,0,1,1,1 1996 | 1994,1,0,1,1 1997 | 1995,1,0,1,0 1998 | 1996,0,0,0,1 1999 | 1997,0,1,0,0 2000 | 1998,0,1,0,0 2001 | 1999,0,1,0,1 2002 | -------------------------------------------------------------------------------- /tests/unit/nonparametric.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from scipy.integrate import nquad 3 | import numpy as np 4 | 5 | from causality.estimation.nonparametric import CausalEffect 6 | from tests.unit import TestAPI 7 | from tests.unit.settings import TOL 8 | 9 | 10 | class TestCausalEffect(TestAPI): 11 | def setUp(self): 12 | self.X = pd.read_csv('./tests/unit/data/X.csv') 13 | self.discrete = pd.read_csv('./tests/unit/data/discrete.csv') 14 | 15 | def test_pdf_discrete(self): 16 | causes = ['c'] 17 | effects = ['d'] 18 | admissable_set = ['a'] 19 | variable_types={'a': 'u','b': 'u','c': 'u','d' : 'u'} 20 | effect = CausalEffect(self.discrete,causes,effects,admissable_set,variable_types) 21 | p = effect.pdf(pd.DataFrame({ 'd' : [1], 'c' : [0]})) 22 | print p 23 | # p(d=1|do(c=0) = 0.45, p(d=1|c=0) = 0.40 24 | assert( abs( 0.45 - p ) < 0.02 ) 25 | 26 | def test_pdf_no_adjustment(self): 27 | causes = ['c'] 28 | effects = ['d'] 29 | admissable_set = [] 30 | variable_types={'a': 'u','b': 'u','c': 'u','d' : 'u'} 31 | effect = CausalEffect(self.discrete,causes,effects,admissable_set,variable_types) 32 | # p(d=1|do(c=0) = 0.45, p(d=1|b=0) = 0.40 33 | p = effect.pdf(pd.DataFrame({ 'd' : [1], 'c' : [0]})) 34 | print p 35 | assert( abs( 0.40 - p ) < 0.02 ) 36 | 37 | def test_pdf_continuous(self): 38 | causes = ['c'] 39 | effects = ['d'] 40 | admissable_set = ['a'] 41 | variable_types={'a': 'c','b': 'c','c': 'c','d' : 'c'} 42 | effect = CausalEffect(self.X,causes,effects,admissable_set,variable_types) 43 | c = np.mean(effect.support['c']) 44 | d = np.mean(effect.support['d']) 45 | e1 = effect.pdf(pd.DataFrame({ 'd' : [d], 'c' : [ 0.9 * c]})) 46 | e2 = effect.pdf(pd.DataFrame({ 'd' : [d], 'c' : [ 1.1 * c]})) 47 | print e2, e1, e2 - e1, (e2 - e1) / e2 48 | assert( abs(e2 - e1) / e2 < 0.05 ) 49 | 50 | 51 | def test_pdf_mixed(self): 52 | pass 53 | 54 | 55 | def test_densities(self): 56 | causes = ['c'] 57 | effects = ['d'] 58 | admissable_set = ['a'] 59 | variable_types={'a': 'c','b': 'c','c': 'c','d' : 'c'} 60 | effect = CausalEffect(self.X,causes,effects,admissable_set,variable_types) 61 | density = lambda x: effect.density.pdf( data_predict=[x]) 62 | integral = nquad( density, [effect.support[d_var] for d_var in admissable_set])[0] 63 | print integral 64 | assert(abs(integral - 1.) < TOL) 65 | 66 | x_vals = [np.mean(effect.support[var]) for var in causes] 67 | z_vals = [np.mean(effect.support[var]) for var in admissable_set] 68 | density = lambda x: effect.conditional_density.pdf(endog_predict=[x], exog_predict=x_vals + z_vals) 69 | integral = nquad(density, [effect.support[d_var] for d_var in effects])[0] 70 | print x_vals, z_vals,integral 71 | assert(abs(integral - 1.) < TOL) 72 | 73 | 74 | def test_get_support(self): 75 | data_ranges = {} 76 | for variable in self.X.columns: 77 | data_ranges[variable] = ( self.X[variable].min(), self.X[variable].max()) 78 | causes = ['c'] 79 | effects = ['d'] 80 | admissable_set = ['a'] 81 | variable_types={'a': 'c','b': 'c','c': 'c','d' : 'c'} 82 | effect = CausalEffect(self.X,causes,effects,admissable_set,variable_types) 83 | for variable, (supp_min, supp_max) in effect.support.items(): 84 | (xmin, xmax) = data_ranges[variable] 85 | assert(supp_min <= xmin) 86 | assert(supp_max >= xmax) 87 | 88 | 89 | def test_integration_function(self): 90 | causes = ['c'] 91 | effects = ['d'] 92 | admissable_set = ['a'] 93 | variable_types={'a': 'c','b': 'c','c': 'c','d' : 'c'} 94 | effect = CausalEffect(self.X,causes,effects,admissable_set,variable_types) 95 | 96 | 97 | 98 | def test_expectation_discrete(self): 99 | causes = ['c'] 100 | effects = ['d'] 101 | admissable_set = ['a'] 102 | variable_types={'a': 'u','b': 'u','c': 'u','d' : 'u'} 103 | effect = CausalEffect(self.discrete, 104 | causes, 105 | effects, 106 | admissable_set, 107 | variable_types, 108 | density=False, 109 | expectation=True) 110 | 111 | x = pd.DataFrame({ 'c' : [0]}) 112 | p = effect.expected_value(x) 113 | print "p(d=1 | do(c = 0) ): ", p 114 | assert( abs( 0.40 - p ) < 0.05 ) 115 | 116 | x = pd.DataFrame({ 'c' : [1]}) 117 | p = effect.expected_value(x) 118 | print "p(d=1 | do(c = 1) ): ", p 119 | assert( abs( 0.40 - p ) < 0.05 ) 120 | 121 | 122 | causes = ['b'] 123 | effects = ['d'] 124 | admissable_set = ['a'] 125 | variable_types={'a': 'u','b': 'u','c': 'u','d' : 'u'} 126 | effect = CausalEffect(self.discrete, 127 | causes, 128 | effects, 129 | admissable_set, 130 | variable_types, 131 | density=False, 132 | expectation=True) 133 | 134 | x = pd.DataFrame({ 'b' : [0]}) 135 | p = effect.expected_value(x) 136 | print "p(d=1 | do(b = 0) ): ", p 137 | assert( abs( p - 0.75 ) < 0.05 ) 138 | 139 | x = pd.DataFrame({ 'b' : [1]}) 140 | p = effect.expected_value(x) 141 | print "p(d=1 | do(b = 1) ): ",p 142 | assert( abs( p - 0.25 ) < 0.05 ) 143 | 144 | 145 | def test_expectation_continuous(self): 146 | causes = ['c'] 147 | effects = ['d'] 148 | admissable_set = ['a'] 149 | variable_types={'a': 'c','b': 'c','c': 'c','d' : 'c'} 150 | effect = CausalEffect(self.X, 151 | causes, 152 | effects, 153 | admissable_set, 154 | variable_types, 155 | density=False, 156 | expectation=True) 157 | 158 | x = pd.DataFrame({ 'c' : [400]}) 159 | p1 = effect.expected_value(x) 160 | print "E(d | do(c = 400) ): ", p1 161 | 162 | x = pd.DataFrame({ 'c' : [600]}) 163 | p2 = effect.expected_value(x) 164 | print "E(d | do(c = 600) ): ", p2 165 | assert( abs( p2 - p1 ) / 200 < 0.5 ) 166 | 167 | 168 | causes = ['b'] 169 | effects = ['d'] 170 | admissable_set = ['a'] 171 | variable_types={'a': 'c','b': 'c','c': 'c','d' : 'c'} 172 | effect = CausalEffect(self.X, 173 | causes, 174 | effects, 175 | admissable_set, 176 | variable_types, 177 | density=False, 178 | expectation=True) 179 | 180 | x = pd.DataFrame({ 'b' : [400]}) 181 | p1 = effect.expected_value(x) 182 | print "E(d | do(b = 400) ): ", p1 183 | 184 | x = pd.DataFrame({ 'b' : [600]}) 185 | p2 = effect.expected_value(x) 186 | print "E(d | do(b = 600) ): ",p2 187 | #assert( abs( p - 0.25 ) < 0.05 ) 188 | assert( abs( ( p2 - p1 ) / 200 - 5. < 0.01 ) ) 189 | -------------------------------------------------------------------------------- /tests/unit/parametric.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | from causality.estimation.parametric import (DifferenceInDifferences, 5 | PropensityScoreMatching, 6 | InverseProbabilityWeightedLS) 7 | from tests.unit import TestAPI 8 | 9 | 10 | class TestDID(TestAPI): 11 | def setUp(self): 12 | SIZE = 2000 13 | assignment = np.random.binomial(1,0.5, size=SIZE) 14 | pre_experiment = assignment + np.random.normal(-1, size=SIZE) 15 | start = assignment + np.random.normal(1, size=SIZE) 16 | end = start + np.random.normal(2.*assignment) + np.random.normal(2, size=SIZE) 17 | self.X_pre = pd.DataFrame({'Start' : pre_experiment, 'End' : start, 'assignment' : assignment}) 18 | self.X = pd.DataFrame({'Start' : start, 'End' : end, 'assignment' : assignment}) 19 | self.did = DifferenceInDifferences() 20 | 21 | def test_assumption_tester(self): 22 | assert self.did.test_parallel_trend(self.X_pre) 23 | 24 | self.X_pre['End'] += self.X_pre['assignment'] 25 | assert not self.did.test_parallel_trend(self.X_pre) 26 | 27 | def test_did_estimator(self): 28 | lower, exp, upper = self.did.average_treatment_effect(self.X) 29 | assert 1.8 <= exp <= 2.2 30 | assert lower <= exp <= upper 31 | 32 | self.did = DifferenceInDifferences(robust=True) 33 | lower, exp, upper = self.did.average_treatment_effect(self.X) 34 | assert 1.8 <= exp <= 2.2 35 | assert lower <= exp <= upper 36 | 37 | 38 | class TestPropScore(TestAPI): 39 | def test_match(self): 40 | matcher = PropensityScoreMatching() 41 | X = pd.DataFrame({'assignment': [1, 0, 0, 0, 0, 0], 42 | 'propensity score': [3, 1, 2, 3, 5, 4]}) 43 | 44 | test, control = matcher.match(X, n_neighbors=3) 45 | assert set(control['propensity score'].values) == set([2, 3, 4]) 46 | 47 | def test_score(self): 48 | N = 5000 49 | z1 = np.random.normal(size=N) 50 | z2 = np.random.choice(['a','b','c'], size=N) 51 | numeric_mapping = {'a' :3, 'b' :4, 'c' :5} 52 | z2_numeric = [numeric_mapping[z2i] for z2i in z2] 53 | p_assign = np.exp(z1 + z2_numeric) / (1. + np.exp(z1 + z2_numeric)) 54 | assignment = np.random.binomial(1, p_assign) 55 | outcome = np.random.normal(assignment) 56 | matcher = PropensityScoreMatching() 57 | X = pd.DataFrame({'z1': z1, 'z2': z2, 'assignment': assignment, 'outcome': outcome}) 58 | confounder_types = {'z1': 'c', 'z2':'o'} 59 | matcher.score(X, confounder_types, store_model_fit=True) 60 | assert 0.7 <= matcher.propensity_score_model.params['z1'] <= 1.3 61 | assert 0.0 <= matcher.propensity_score_model.params['z2_b'] <= 2.0 62 | assert 1.0 <= matcher.propensity_score_model.params['z2_c'] <= 3.0 63 | assert 2.0 <= matcher.propensity_score_model.params['intercept'] <= 4.0 64 | 65 | def test_at_estimators(self): 66 | N = 1000 # how many data points 67 | 68 | z1 = 0.5 * np.random.normal(size=N) # a few confounding variables 69 | z2 = 0.5 * np.random.normal(size=N) 70 | z3 = 0.5 * np.random.normal(size=N) 71 | 72 | arg = (z1 + z2 + z3 + np.random.normal(size=N)) 73 | p = np.exp(arg) / (1. + np.exp(arg)) # propensity to receive treatment, P(d|z), taking on a logistic form 74 | d = np.random.binomial(1, p) 75 | 76 | y = (np.random.normal(size=N) + (z1 + z2 + z3 + 1.) * d) # effect of d is confounded by z. True ATE is 1. 77 | 78 | X = pd.DataFrame({'d': d, 'z1': z1, 'z2': z2, 'z3': z3, 'y': y, 'p': p}) 79 | 80 | matcher = PropensityScoreMatching() 81 | ATE = matcher.estimate_ATE(X, 'd', 'y', {'z1': 'c', 'z2': 'c', 'z3': 'c'}) 82 | assert 0.9 <= ATE <= 1.1 83 | 84 | class TestIPW(TestAPI): 85 | def test_estimators(self): 86 | N = 2000 87 | z = np.random.normal(size=N) 88 | d = np.random.binomial(1, p=1. / (1. + np.exp(-z))) 89 | y0 = np.random.normal(size=N) 90 | y1 = y0 + 2. * (1 + z) 91 | y = (d == 1) * y1 + (d == 0) * y0 92 | X = pd.DataFrame({'d': d, 'z': z, 'y': y, 'y0': y0, 'y1': y1}) 93 | 94 | assignment = 'd' 95 | confounder_types = {'z': 'c'} 96 | outcome = 'y' 97 | ipw_model = InverseProbabilityWeightedLS() 98 | atc_lower, atc_exp, atc_upper = ipw_model.estimate_ATC(X, 99 | assignment, 100 | outcome, 101 | confounder_types, 102 | propensity_score_name='propensity score') 103 | assert 0.9 * atc_lower <= (X[X['d'] == 0]['y1'] - X[X['d'] == 0]['y0']).mean() <= 1.1 * atc_upper 104 | 105 | att_lower, att_exp, att_upper = ipw_model.estimate_ATT(X, 106 | assignment, 107 | outcome, 108 | confounder_types, 109 | propensity_score_name='propensity score') 110 | assert 0.9 * att_lower <= (X[X['d'] == 1]['y1'] - X[X['d'] == 1]['y0']).mean() <= 1.1 * att_upper 111 | 112 | 113 | ate_lower, ate_exp, ate_upper = ipw_model.estimate_ATE(X, 114 | assignment, 115 | outcome, 116 | confounder_types, 117 | propensity_score_name='propensity score') 118 | assert 0.9 * ate_lower <= (X['y1'] - X['y0']).mean() <= 1.1 * ate_upper 119 | -------------------------------------------------------------------------------- /tests/unit/settings.py: -------------------------------------------------------------------------------- 1 | TOL = 0.01 # tolerance on density normalization 2 | -------------------------------------------------------------------------------- /tests/unit/test_IC.py: -------------------------------------------------------------------------------- 1 | from tests.unit import TestAPI 2 | import itertools 3 | 4 | import numpy.random 5 | import pandas as pd 6 | import networkx as nx 7 | 8 | from causality.inference.search import IC 9 | from causality.inference.independence_tests import RobustRegressionTest 10 | 11 | TEST_SET_SIZE = 2000 12 | 13 | class Test_IC(TestAPI): 14 | 15 | def setUp(self): 16 | x1 = numpy.random.normal(size=TEST_SET_SIZE) 17 | x2 = x1 + numpy.random.normal(size=TEST_SET_SIZE) 18 | x3 = x1 + numpy.random.normal(size=TEST_SET_SIZE) 19 | x4 = x2 + x3 + numpy.random.normal(size=TEST_SET_SIZE) 20 | x5 = x4 + numpy.random.normal(size=TEST_SET_SIZE) 21 | self.X = pd.DataFrame({'x1' : x1, 'x2' : x2, 'x3' : x3, 'x4' : x4, 'x5' : x5}) 22 | self.variable_types = {'x1' : 'c', 'x2' : 'c', 'x3' : 'c', 'x4' : 'c', 'x5' : 'c'} 23 | self.true_neighbors = { 'x1' : set(['x2','x3']), 24 | 'x2' : set(['x1','x4']), 25 | 'x3' : set(['x1','x4']), 26 | 'x4' : set(['x2','x3','x5']), 27 | 'x5' : set(['x4'])} 28 | self.true_colliders = set([('x3','x4'), ('x2','x4')]) 29 | self.true_marked = set([('x4','x5')]) 30 | self.ic = IC(RobustRegressionTest) 31 | self.ic.search(self.X, self.variable_types) 32 | 33 | def test_build_g(self): 34 | self.ic._build_g(self.variable_types) 35 | V = len(self.X.columns) 36 | assert(len(self.ic._g.edges()) == (V-1)*V / 2) 37 | assert(set(self.ic._g.nodes()) == set(self.variable_types.keys())) 38 | for node, variable_type in self.variable_types.items(): 39 | assert(self.ic._g.nodes[node]['type'] == variable_type) 40 | for i, j in self.ic._g.edges(): 41 | assert(self.ic._g.get_edge_data(i, j)['marked'] == False) 42 | 43 | def test_find_skeleton(self): 44 | self.ic._build_g(self.variable_types) 45 | self.ic._find_skeleton(self.X, self.variable_types) 46 | for node, neighbors in self.true_neighbors.items(): 47 | assert(set(self.ic._g.neighbors(node)) == neighbors) 48 | 49 | def test_orient_colliders(self): 50 | self.ic._build_g(self.variable_types) 51 | self.ic._find_skeleton(self.X, self.variable_types) 52 | self.ic._orient_colliders() 53 | for i, j in self.ic._g.edges(): 54 | measured_colliders = self.ic._g.get_edge_data(i, j)['arrows'] 55 | if len(measured_colliders) > 0: 56 | if j in measured_colliders: 57 | assert((i,j) in self.true_colliders) 58 | else: 59 | assert((j,i) in self.true_colliders) 60 | else: 61 | assert((i,j) not in self.true_colliders and (j,i) not in self.true_colliders) 62 | 63 | def test_separating_set(self): 64 | self.ic._build_g(self.variable_types) 65 | self.ic._find_skeleton(self.X, self.variable_types) 66 | for xi, xj in itertools.combinations(self.variable_types.keys(), 2): 67 | if not self.ic._g.has_edge(xi,xj): 68 | if (xi,xj) in self.ic.separating_sets: 69 | z = self.ic.separating_sets[(xi,xj)] 70 | else: 71 | z = self.ic.separating_sets[(xj,xi)] 72 | test = self.ic.independence_test([xj],[xi], list(z), self.X, self.ic.alpha) 73 | assert(test.independent()) 74 | 75 | def test_marked_directed_path(self): 76 | marked_edges = [('a','b'),('b','c'),('c','d')] 77 | unmarked_edges = [('a','d')] 78 | nodes = ['a','b','c','d'] 79 | g = nx.Graph() 80 | g.add_edges_from(marked_edges, marked=True) 81 | g.add_edges_from(unmarked_edges, marked=False) 82 | for i, j in (marked_edges + unmarked_edges): 83 | g.get_edge_data(i, j)['arrows'] = [j] 84 | self.ic._g = g 85 | assert(self.ic._marked_directed_path('a','d')) 86 | assert(not self.ic._marked_directed_path('d','a')) 87 | 88 | def test_recursion_rule_1(self): 89 | pass 90 | 91 | def test_recursion_rule_2(self): 92 | pass 93 | 94 | def test_search(self): 95 | self.ic.search(self.X, self.variable_types) 96 | for i, j in self.ic._g.edges(): 97 | if self.ic._g.get_edge_data(i, j)['marked']: 98 | assert( (i,j) in self.true_marked or (j,i) in self.true_marked) 99 | else: 100 | assert( (i,j) not in self.true_marked and (j,i) not in self.true_marked) 101 | 102 | -------------------------------------------------------------------------------- /tests/unit/test_cit.py: -------------------------------------------------------------------------------- 1 | from tests.unit import TestAPI 2 | import itertools 3 | 4 | import numpy.random 5 | import pandas as pd 6 | import networkx as nx 7 | 8 | from causality.inference.independence_tests import (RobustRegressionTest, 9 | ChiSquaredTest, 10 | MutualInformationTest) 11 | 12 | TEST_SET_SIZE = 2000 13 | TRIALS = 2 14 | P = 0.5 15 | 16 | class TestChi2(TestAPI): 17 | 18 | def setUp(self): 19 | a = numpy.random.binomial(TRIALS,P,size=TEST_SET_SIZE) 20 | b = (numpy.random.binomial(TRIALS,P,size=TEST_SET_SIZE) + a) % 3 21 | c = numpy.random.binomial(TRIALS,P,size=TEST_SET_SIZE) 22 | d = numpy.random.binomial(TRIALS,P,size=TEST_SET_SIZE) 23 | self.X = pd.DataFrame({'a' : a, 24 | 'b' : b, 25 | 'c' : c, 26 | 'd' : d }) 27 | self.alpha = 0.05 28 | 29 | def test_chi2(self): 30 | x = ['a'] 31 | y = ['b'] 32 | z = [] 33 | print(f"${x} \\not\\perp {y} | {z}$") 34 | test = ChiSquaredTest(y,x,z,self.X,self.alpha) 35 | assert(not test.independent()) 36 | 37 | x = ['a'] 38 | y = ['b'] 39 | z = ['c','d'] 40 | print(f"${x} \\not\\perp {y} | {z}$") 41 | test = ChiSquaredTest(y,x,z,self.X,self.alpha) 42 | assert(not test.independent()) 43 | 44 | x = ['a'] 45 | y = ['c'] 46 | z = [] 47 | print(f"${x} \\perp {y} | {z}$") 48 | test = ChiSquaredTest(y,x,z,self.X,self.alpha) 49 | assert(test.independent()) 50 | 51 | x = ['a'] 52 | y = ['c'] 53 | z = ['b'] 54 | print(f"${x} \\perp {y} | {z}$") 55 | test = ChiSquaredTest(y,x,z,self.X,self.alpha) 56 | assert(test.independent()) 57 | 58 | x = ['a','b'] 59 | y = ['c'] 60 | z = ['d'] 61 | print(f"${x} \\perp {y} | {z}$") 62 | test = ChiSquaredTest(y,x,z,self.X,self.alpha) 63 | assert(test.independent()) 64 | 65 | x = ['a'] 66 | y = ['b','c'] 67 | z = ['d'] 68 | print(f"${x} \\not\\perp {y} | {z}$") 69 | test = ChiSquaredTest(y,x,z,self.X,self.alpha) 70 | assert(not test.independent()) 71 | 72 | 73 | class TestMutualInformation(TestAPI): 74 | def setUp(self): 75 | size = 1000 76 | x1 = numpy.random.choice(range(5), size=size) 77 | x2 = [round(0.7*numpy.random.rand() * xi) for xi in x1] 78 | x3 = [round(0.7*numpy.random.rand() * xi) for xi in x2] 79 | self.X = pd.DataFrame({'x1':x1,'x2':x2, 'x3':x3}) 80 | self.alpha = 0.05 81 | self.variable_types = {'x1':'d', 'x2':'d', 'x3':'d'} 82 | 83 | def test_mi(self): 84 | y = ['x3'] 85 | x = ['x1'] 86 | z = ['x2'] 87 | test = MutualInformationTest(y, x, z, self.X, self.alpha, variable_types=self.variable_types) 88 | assert(test.independent()) 89 | 90 | y = ['x3'] 91 | x = ['x1'] 92 | z = [] 93 | test = MutualInformationTest(y, x, z, self.X, self.alpha, variable_types=self.variable_types) 94 | assert(not test.independent()) 95 | 96 | y = ['x1'] 97 | x = ['x1'] 98 | z = [] 99 | test = MutualInformationTest(y, x, z, self.X, self.alpha, variable_types=self.variable_types) 100 | assert(not test.independent()) 101 | 102 | I, dI = test.max_likelihood_information(x, y, self.X) 103 | z = 1.96 104 | assert((numpy.exp(I-z*dI) < 5) and (5 < numpy.exp(I+z*dI))) 105 | --------------------------------------------------------------------------------