├── .gitignore ├── CTL ├── __init__.py ├── _tree.py ├── causal_learn_forest.py ├── causal_tree │ ├── __init__.py │ ├── ct.py │ ├── ctl │ │ ├── __init__.py │ │ ├── adaptive.py │ │ ├── binary_ctl.py │ │ ├── ctl_base.py │ │ ├── ctl_honest.py │ │ ├── ctl_val_honest.py │ │ └── honest.py │ ├── ctl_match │ │ ├── __init__.py │ │ ├── binary_ctl.py │ │ └── ctl_base.py │ ├── ctl_trigger │ │ ├── __init__.py │ │ ├── adaptive_trigger.py │ │ ├── ctl_base_trigger.py │ │ ├── ctl_honest_trigger.py │ │ ├── ctl_val_honest_trigger.py │ │ ├── honest_trigger.py │ │ └── trigger_ctl.py │ ├── nn_pehe │ │ ├── __init__.py │ │ ├── balance_split.py │ │ ├── base.py │ │ ├── honest.py │ │ ├── tree.py │ │ └── val.py │ ├── r_tree │ │ ├── __init__.py │ │ ├── base.py │ │ └── tree.py │ ├── sig_diff │ │ ├── __init__.py │ │ ├── sig.py │ │ ├── sig_base.py │ │ └── sig_val.py │ ├── util.py │ ├── util_c.c │ ├── util_c.cpython-37m-darwin.so │ └── util_c.pyx ├── causal_tree_learn.py ├── causal_tree_match.py ├── pehe_tree.py ├── sig_diff_tree.py └── tree.py ├── LICENSE ├── README.md ├── binary_example.py ├── build ├── lib.macosx-12.6-arm64-cpython-310 │ └── CTL │ │ ├── __init__.py │ │ ├── _tree.py │ │ ├── causal_learn_forest.py │ │ ├── causal_tree │ │ ├── __init__.py │ │ ├── ct.py │ │ ├── ctl │ │ │ ├── __init__.py │ │ │ ├── adaptive.py │ │ │ ├── binary_ctl.py │ │ │ ├── ctl_base.py │ │ │ ├── ctl_honest.py │ │ │ ├── ctl_val_honest.py │ │ │ └── honest.py │ │ ├── ctl_match │ │ │ ├── __init__.py │ │ │ ├── binary_ctl.py │ │ │ └── ctl_base.py │ │ ├── ctl_trigger │ │ │ ├── __init__.py │ │ │ ├── adaptive_trigger.py │ │ │ ├── ctl_base_trigger.py │ │ │ ├── ctl_honest_trigger.py │ │ │ ├── ctl_val_honest_trigger.py │ │ │ ├── honest_trigger.py │ │ │ └── trigger_ctl.py │ │ ├── nn_pehe │ │ │ ├── __init__.py │ │ │ ├── balance_split.py │ │ │ ├── base.py │ │ │ ├── honest.py │ │ │ ├── tree.py │ │ │ └── val.py │ │ ├── r_tree │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ └── tree.py │ │ ├── sig_diff │ │ │ ├── __init__.py │ │ │ ├── sig.py │ │ │ ├── sig_base.py │ │ │ └── sig_val.py │ │ ├── util.py │ │ ├── util_c.c │ │ ├── util_c.cpython-310-darwin.so │ │ └── util_c.pyx │ │ ├── causal_tree_learn.py │ │ ├── causal_tree_match.py │ │ ├── pehe_tree.py │ │ ├── sig_diff_tree.py │ │ └── tree.py └── temp.macosx-12.6-arm64-cpython-310 │ └── CTL │ └── causal_tree │ └── util_c.o ├── causal_tree_learn.egg-info ├── PKG-INFO ├── SOURCES.txt ├── dependency_links.txt ├── requires.txt └── top_level.txt ├── data └── asthma.txt ├── dist ├── causal-tree-learn-2.43.tar.gz └── causal_tree_learn-2.43-cp310-cp310-macosx_12_0_arm64.whl ├── poetry.lock ├── pyproject.toml ├── setup.py └── trigger_example.py /.gitignore: -------------------------------------------------------------------------------- 1 | output/ 2 | __pycache__/ 3 | .DS_store 4 | .idea/ 5 | # build/ 6 | # causal_tree_learn.egg-info 7 | # dist 8 | /backup/ 9 | notebooks/ 10 | 11 | test.py 12 | test_script.py 13 | test_cython_trigger.py 14 | test_cython_binary.py 15 | test_cython.py 16 | binary_example_random.py 17 | test_trigger.py 18 | notebooks/data_generation.py 19 | notebooks/2020-04-28 - Vectorize with Matching.ipynb 20 | test2.py 21 | test3.py 22 | 23 | .ipynb_checkpoints 24 | 25 | test.* 26 | /dist (1)/ 27 | 28 | notes.txt 29 | .venv -------------------------------------------------------------------------------- /CTL/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/CTL/__init__.py -------------------------------------------------------------------------------- /CTL/causal_learn_forest.py: -------------------------------------------------------------------------------- 1 | from CTL.causal_tree_learn import CausalTree 2 | import numpy as np 3 | 4 | 5 | class CausalTreeLearnForest: 6 | 7 | def __init__(self, num_trees=10, bootstrap=True, max_samples=None, max_features="auto", max_depth=-1, 8 | val_honest=False, honest=False, min_size=2, split_size=0.5, weight=0.5, feature_batch_size=None, 9 | seed=724): 10 | 11 | tree_params = { 12 | "weight": weight, 13 | "split_size": split_size, 14 | "max_depth": max_depth, 15 | "seed": seed, 16 | "min_size": min_size, 17 | "val_honest": val_honest, 18 | "honest": honest, 19 | "feature_batch_size": feature_batch_size, 20 | } 21 | 22 | self.num_trees = num_trees 23 | self.bootstrap = bootstrap 24 | self.max_samples = max_samples 25 | self.max_features = max_features 26 | self.max_depth = max_depth 27 | 28 | self.trees = tuple(CausalTree(**tree_params) for i in range(num_trees)) 29 | 30 | def fit(self, x, y, t): 31 | x = x.astype(float) 32 | y = y.astype(float) 33 | t = t.astype(float) 34 | 35 | for tree in self.trees: 36 | example_samples, feature_samples = self._sample(x) 37 | 38 | sample_x = x[np.ix_(example_samples, feature_samples)] 39 | sample_y = y[example_samples] 40 | sample_t = t[example_samples] 41 | 42 | tree.fit(sample_x, sample_y, sample_t) 43 | 44 | def predict(self, x): 45 | predictions = np.zeros((self.num_trees, x.shape[0])) 46 | for i, tree in enumerate(self.trees): 47 | predictions[i] = tree.predict(x) 48 | 49 | return np.mean(predictions, axis=0) 50 | 51 | def _sample(self, x): 52 | total_examples = x.shape[0] 53 | total_features = x.shape[1] 54 | 55 | example_samples = self._sample_examples(total_examples) 56 | feature_samples = self._feature_sample(total_features) 57 | 58 | return example_samples, feature_samples 59 | 60 | def _sample_examples(self, total_examples): 61 | if self.bootstrap: 62 | if self.max_samples: 63 | if isinstance(self.max_samples, float): 64 | example_samples = np.random.choice(np.arange(0, total_examples), 65 | size=int(self.max_samples * total_examples)) 66 | elif isinstance(self.max_samples, int): 67 | example_samples = np.random.choice(np.arange(0, total_examples), size=self.max_samples) 68 | else: 69 | example_samples = np.random.choice(np.arange(0, total_examples), size=total_examples) 70 | else: 71 | example_samples = np.random.choice(np.arange(0, total_examples), size=total_examples) 72 | else: 73 | example_samples = np.arange(0, total_examples) 74 | 75 | return example_samples 76 | 77 | def _feature_sample(self, total_features): 78 | num_features = self._feature_sample_size(total_features) 79 | feature_samples = np.random.permutation(total_features)[:num_features] 80 | return feature_samples 81 | 82 | def _feature_sample_size(self, total_features): 83 | num_features = total_features 84 | if self.max_features == "auto" or self.max_features == "sqrt": 85 | num_features = int(np.sqrt(num_features)) 86 | elif isinstance(self.max_features, int): 87 | num_features = self.max_features 88 | elif isinstance(self.max_features, float): 89 | num_features = int(self.max_features * total_features) 90 | return num_features 91 | -------------------------------------------------------------------------------- /CTL/causal_tree/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/CTL/causal_tree/__init__.py -------------------------------------------------------------------------------- /CTL/causal_tree/ct.py: -------------------------------------------------------------------------------- 1 | from CTL.tree import * 2 | from abc import ABC, abstractmethod 3 | 4 | 5 | class CTNode(ABC): 6 | 7 | def __init__(self): 8 | super().__init__() 9 | 10 | 11 | class CausalTree(ABC): 12 | 13 | def __init__(self): 14 | super().__init__() 15 | 16 | # the learning objective 17 | self.obj = 0.0 18 | # Haven't implemented "mse" yet 19 | self.mse = 0.0 20 | 21 | # tree properties 22 | self.tree_depth = 0 23 | self.num_leaves = 0 24 | 25 | @abstractmethod 26 | def fit(self, x, y, t): 27 | pass 28 | 29 | @abstractmethod 30 | def predict(self, x): 31 | pass 32 | -------------------------------------------------------------------------------- /CTL/causal_tree/ctl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/CTL/causal_tree/ctl/__init__.py -------------------------------------------------------------------------------- /CTL/causal_tree/ctl/adaptive.py: -------------------------------------------------------------------------------- 1 | from CTL.causal_tree.ctl.binary_ctl import * 2 | from sklearn.model_selection import train_test_split 3 | 4 | 5 | class AdaptiveNode(CTLearnNode): 6 | 7 | def __init__(self, **kwargs): 8 | super().__init__(**kwargs) 9 | 10 | # self.obj = obj 11 | 12 | 13 | # ---------------------------------------------------------------- 14 | # Base causal tree (ctl, base objective) 15 | # ---------------------------------------------------------------- 16 | class AdaptiveTree(CTLearn): 17 | 18 | def __init__(self, **kwargs): 19 | super().__init__(**kwargs) 20 | self.root = AdaptiveNode() 21 | 22 | def adaptive_eval(self, train_y, train_t): 23 | total_train = train_y.shape[0] 24 | 25 | train_effect = ace(train_y, train_t) 26 | 27 | train_mse = total_train * (train_effect ** 2) 28 | 29 | obj = train_mse 30 | mse = total_train * (train_effect ** 2) 31 | 32 | return obj, mse 33 | 34 | def fit(self, x, y, t): 35 | if x.shape[0] == 0: 36 | return 0 37 | 38 | # ---------------------------------------------------------------- 39 | # Seed 40 | # ---------------------------------------------------------------- 41 | np.random.seed(self.seed) 42 | 43 | # ---------------------------------------------------------------- 44 | # Verbosity? 45 | # ---------------------------------------------------------------- 46 | 47 | # ---------------------------------------------------------------- 48 | # Split data 49 | # ---------------------------------------------------------------- 50 | 51 | self.root.num_samples = y.shape[0] 52 | # ---------------------------------------------------------------- 53 | # effect and pvals 54 | # ---------------------------------------------------------------- 55 | effect = tau_squared(y, t) 56 | p_val = get_pval(y, t) 57 | self.root.effect = effect 58 | self.root.p_val = p_val 59 | 60 | # ---------------------------------------------------------------- 61 | # Not sure if i should eval in root or not 62 | # ---------------------------------------------------------------- 63 | node_eval, mse = self.adaptive_eval(y, t) 64 | self.root.obj = node_eval 65 | 66 | # ---------------------------------------------------------------- 67 | # Add control/treatment means 68 | # ---------------------------------------------------------------- 69 | self.root.control_mean = np.mean(y[t == 0]) 70 | self.root.treatment_mean = np.mean(y[t == 1]) 71 | 72 | self.root.num_samples = x.shape[0] 73 | 74 | self._fit(self.root, x, y, t) 75 | 76 | def _fit(self, node: AdaptiveNode, train_x, train_y, train_t): 77 | 78 | if train_x.shape[0] == 0: 79 | return node 80 | 81 | if node.node_depth > self.tree_depth: 82 | self.tree_depth = node.node_depth 83 | 84 | if self.max_depth == self.tree_depth: 85 | if node.effect > self.max_effect: 86 | self.max_effect = node.effect 87 | if node.effect < self.min_effect: 88 | self.min_effect = node.effect 89 | self.num_leaves += 1 90 | node.leaf_num = self.num_leaves 91 | node.is_leaf = True 92 | return node 93 | 94 | best_gain = 0.0 95 | best_attributes = [] 96 | best_tb_obj, best_fb_obj = (0.0, 0.0) 97 | 98 | column_count = train_x.shape[1] 99 | for col in range(0, column_count): 100 | unique_vals = np.unique(train_x[:, col]) 101 | 102 | if self.max_values is not None: 103 | if self.max_values < 1: 104 | idx = np.round(np.linspace( 105 | 0, len(unique_vals) - 1, self.max_values * len(unique_vals))).astype(int) 106 | unique_vals = unique_vals[idx] 107 | else: 108 | idx = np.round(np.linspace( 109 | 0, len(unique_vals) - 1, self.max_values)).astype(int) 110 | unique_vals = unique_vals[idx] 111 | 112 | for value in unique_vals: 113 | 114 | # check training data size 115 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 116 | = divide_set(train_x, train_y, train_t, col, value) 117 | check1 = check_min_size(self.min_size, train_t1) 118 | check2 = check_min_size(self.min_size, train_t2) 119 | if check1 or check2: 120 | continue 121 | 122 | tb_eval, tb_mse = self.adaptive_eval(train_y1, train_t1) 123 | fb_eval, fb_mse = self.adaptive_eval(train_y2, train_t2) 124 | 125 | split_eval = (tb_eval + fb_eval) 126 | gain = -node.obj + split_eval 127 | 128 | if gain > best_gain: 129 | best_gain = gain 130 | best_attributes = [col, value] 131 | best_tb_obj, best_fb_obj = (tb_eval, fb_eval) 132 | 133 | if best_gain > 0: 134 | node.col = best_attributes[0] 135 | node.value = best_attributes[1] 136 | 137 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 138 | = divide_set(train_x, train_y, train_t, node.col, node.value) 139 | 140 | y1 = train_y1 141 | y2 = train_y2 142 | t1 = train_t1 143 | t2 = train_t2 144 | 145 | best_tb_effect = ace(y1, t1) 146 | best_fb_effect = ace(y2, t2) 147 | tb_p_val = get_pval(y1, t1) 148 | fb_p_val = get_pval(y2, t2) 149 | 150 | self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj 151 | 152 | # ---------------------------------------------------------------- 153 | # Ignore "mse" here, come back to it later? 154 | # ---------------------------------------------------------------- 155 | 156 | tb = AdaptiveNode(obj=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val, 157 | node_depth=node.node_depth + 1, 158 | num_samples=y1.shape[0]) 159 | fb = AdaptiveNode(obj=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val, 160 | node_depth=node.node_depth + 1, 161 | num_samples=y2.shape[0]) 162 | 163 | node.true_branch = self._fit(tb, train_x1, train_y1, train_t1) 164 | node.false_branch = self._fit(fb, train_x2, train_y2, train_t2) 165 | 166 | if node.effect > self.max_effect: 167 | self.max_effect = node.effect 168 | if node.effect < self.min_effect: 169 | self.min_effect = node.effect 170 | 171 | return node 172 | 173 | else: 174 | if node.effect > self.max_effect: 175 | self.max_effect = node.effect 176 | if node.effect < self.min_effect: 177 | self.min_effect = node.effect 178 | 179 | self.num_leaves += 1 180 | node.leaf_num = self.num_leaves 181 | node.is_leaf = True 182 | return node 183 | -------------------------------------------------------------------------------- /CTL/causal_tree/ctl/ctl_base.py: -------------------------------------------------------------------------------- 1 | from CTL.causal_tree.ctl.binary_ctl import * 2 | from sklearn.model_selection import train_test_split 3 | 4 | 5 | class BaseCausalTreeLearnNode(CTLearnNode): 6 | 7 | def __init__(self, **kwargs): 8 | super().__init__(**kwargs) 9 | 10 | # self.obj = obj 11 | 12 | 13 | # ---------------------------------------------------------------- 14 | # Base causal tree (ctl, base objective) 15 | # ---------------------------------------------------------------- 16 | class CausalTreeLearnBase(CTLearn): 17 | 18 | def __init__(self, **kwargs): 19 | super().__init__(**kwargs) 20 | self.root = BaseCausalTreeLearnNode() 21 | 22 | def fit(self, x, y, t): 23 | if x.shape[0] == 0: 24 | return 0 25 | 26 | # ---------------------------------------------------------------- 27 | # Seed 28 | # ---------------------------------------------------------------- 29 | np.random.seed(self.seed) 30 | 31 | # ---------------------------------------------------------------- 32 | # Verbosity? 33 | # ---------------------------------------------------------------- 34 | 35 | # ---------------------------------------------------------------- 36 | # Split data 37 | # ---------------------------------------------------------------- 38 | train_x, val_x, train_y, val_y, train_t, val_t = train_test_split(x, y, t, random_state=self.seed, shuffle=True, 39 | test_size=self.val_split) 40 | self.root.num_samples = train_y.shape[0] 41 | # ---------------------------------------------------------------- 42 | # effect and pvals 43 | # ---------------------------------------------------------------- 44 | effect = tau_squared(y, t) 45 | p_val = get_pval(y, t) 46 | self.root.effect = effect 47 | self.root.p_val = p_val 48 | 49 | # ---------------------------------------------------------------- 50 | # Not sure if i should eval in root or not 51 | # ---------------------------------------------------------------- 52 | node_eval, mse = self._eval(train_y, train_t, val_y, val_t) 53 | self.root.obj = node_eval 54 | 55 | # ---------------------------------------------------------------- 56 | # Add control/treatment means 57 | # ---------------------------------------------------------------- 58 | self.root.control_mean = np.mean(y[t == 0]) 59 | self.root.treatment_mean = np.mean(y[t == 1]) 60 | 61 | self.root.num_samples = x.shape[0] 62 | 63 | self._fit(self.root, train_x, train_y, train_t, val_x, val_y, val_t) 64 | 65 | def _fit(self, node: BaseCausalTreeLearnNode, train_x, train_y, train_t, val_x, val_y, val_t): 66 | 67 | if train_x.shape[0] == 0 or val_x.shape[0] == 0: 68 | return node 69 | 70 | if node.node_depth > self.tree_depth: 71 | self.tree_depth = node.node_depth 72 | 73 | if self.max_depth == self.tree_depth: 74 | if node.effect > self.max_effect: 75 | self.max_effect = node.effect 76 | if node.effect < self.min_effect: 77 | self.min_effect = node.effect 78 | self.num_leaves += 1 79 | node.leaf_num = self.num_leaves 80 | node.is_leaf = True 81 | return node 82 | 83 | best_gain = 0.0 84 | best_attributes = [] 85 | best_tb_obj, best_fb_obj = (0.0, 0.0) 86 | 87 | column_count = train_x.shape[1] 88 | for col in range(0, column_count): 89 | unique_vals = np.unique(train_x[:, col]) 90 | 91 | if self.max_values is not None: 92 | if self.max_values < 1: 93 | idx = np.round(np.linspace( 94 | 0, len(unique_vals) - 1, self.max_values * len(unique_vals))).astype(int) 95 | unique_vals = unique_vals[idx] 96 | else: 97 | idx = np.round(np.linspace( 98 | 0, len(unique_vals) - 1, self.max_values)).astype(int) 99 | unique_vals = unique_vals[idx] 100 | 101 | # using the faster evaluation with vector/matrix calculations 102 | try: 103 | if self.feature_batch_size is None: 104 | split_obj, upper_obj, lower_obj, value = self._eval_fast(train_x, train_y, train_t, val_x, val_y, 105 | val_t, 106 | unique_vals, col) 107 | gain = -node.obj + split_obj 108 | if gain > best_gain: 109 | best_gain = gain 110 | best_attributes = [col, value] 111 | best_tb_obj, best_fb_obj = (upper_obj, lower_obj) 112 | else: 113 | 114 | for x in batch(unique_vals, self.feature_batch_size): 115 | split_obj, upper_obj, lower_obj, value = self._eval_fast(train_x, train_y, train_t, val_x, 116 | val_y, val_t, x, col) 117 | 118 | gain = -node.obj + split_obj 119 | if gain > best_gain: 120 | best_gain = gain 121 | best_attributes = [col, value] 122 | best_tb_obj, best_fb_obj = (upper_obj, lower_obj) 123 | # if that fails (due to memory maybe?) then use the old calculation 124 | except: 125 | for value in unique_vals: 126 | 127 | (val_x1, val_x2, val_y1, val_y2, val_t1, val_t2) \ 128 | = divide_set(val_x, val_y, val_t, col, value) 129 | 130 | # check validation set size 131 | val_size = self.val_split * self.min_size if self.val_split * self.min_size > 2 else 2 132 | if check_min_size(val_size, val_t1) or check_min_size(val_size, val_t2): 133 | continue 134 | 135 | # check training data size 136 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 137 | = divide_set(train_x, train_y, train_t, col, value) 138 | check1 = check_min_size(self.min_size, train_t1) 139 | check2 = check_min_size(self.min_size, train_t2) 140 | if check1 or check2: 141 | continue 142 | 143 | tb_eval, tb_mse = self._eval(train_y1, train_t1, val_y1, val_t1) 144 | fb_eval, fb_mse = self._eval(train_y2, train_t2, val_y2, val_t2) 145 | 146 | split_eval = (tb_eval + fb_eval) 147 | gain = -node.obj + split_eval 148 | 149 | if gain > best_gain: 150 | best_gain = gain 151 | best_attributes = [col, value] 152 | best_tb_obj, best_fb_obj = (tb_eval, fb_eval) 153 | 154 | if best_gain > 0: 155 | node.col = best_attributes[0] 156 | node.value = best_attributes[1] 157 | 158 | # print(node.col) 159 | 160 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 161 | = divide_set(train_x, train_y, train_t, node.col, node.value) 162 | 163 | (val_x1, val_x2, val_y1, val_y2, val_t1, val_t2) \ 164 | = divide_set(val_x, val_y, val_t, node.col, node.value) 165 | 166 | y1 = np.concatenate((train_y1, val_y1)) 167 | y2 = np.concatenate((train_y2, val_y2)) 168 | t1 = np.concatenate((train_t1, val_t1)) 169 | t2 = np.concatenate((train_t2, val_t2)) 170 | 171 | best_tb_effect = ace(y1, t1) 172 | best_fb_effect = ace(y2, t2) 173 | tb_p_val = get_pval(y1, t1) 174 | fb_p_val = get_pval(y2, t2) 175 | 176 | self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj 177 | 178 | # ---------------------------------------------------------------- 179 | # Ignore "mse" here, come back to it later? 180 | # ---------------------------------------------------------------- 181 | 182 | tb = BaseCausalTreeLearnNode(obj=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val, 183 | node_depth=node.node_depth + 1, 184 | num_samples=y1.shape[0]) 185 | fb = BaseCausalTreeLearnNode(obj=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val, 186 | node_depth=node.node_depth + 1, 187 | num_samples=y2.shape[0]) 188 | 189 | node.true_branch = self._fit(tb, train_x1, train_y1, train_t1, val_x1, val_y1, val_t1) 190 | node.false_branch = self._fit(fb, train_x2, train_y2, train_t2, val_x2, val_y2, val_t2) 191 | 192 | if node.effect > self.max_effect: 193 | self.max_effect = node.effect 194 | if node.effect < self.min_effect: 195 | self.min_effect = node.effect 196 | 197 | return node 198 | 199 | else: 200 | if node.effect > self.max_effect: 201 | self.max_effect = node.effect 202 | if node.effect < self.min_effect: 203 | self.min_effect = node.effect 204 | 205 | self.num_leaves += 1 206 | node.leaf_num = self.num_leaves 207 | node.is_leaf = True 208 | return node 209 | -------------------------------------------------------------------------------- /CTL/causal_tree/ctl_match/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/CTL/causal_tree/ctl_match/__init__.py -------------------------------------------------------------------------------- /CTL/causal_tree/ctl_match/ctl_base.py: -------------------------------------------------------------------------------- 1 | from CTL.causal_tree.ctl_match.binary_ctl import * 2 | from sklearn.model_selection import train_test_split 3 | 4 | 5 | class BaseCausalTreeLearnNode(CTLearnNode): 6 | 7 | def __init__(self, **kwargs): 8 | super().__init__(**kwargs) 9 | 10 | # self.obj = obj 11 | 12 | 13 | # ---------------------------------------------------------------- 14 | # Base causal tree (ctl, base objective) 15 | # ---------------------------------------------------------------- 16 | class CTLMatchBase(CTLMatch): 17 | 18 | def __init__(self, **kwargs): 19 | super().__init__(**kwargs) 20 | self.root = BaseCausalTreeLearnNode() 21 | 22 | def fit(self, x, y, t): 23 | if x.shape[0] == 0: 24 | return 0 25 | 26 | # ---------------------------------------------------------------- 27 | # Seed 28 | # ---------------------------------------------------------------- 29 | np.random.seed(self.seed) 30 | 31 | # ---------------------------------------------------------------- 32 | # Verbosity? 33 | # ---------------------------------------------------------------- 34 | 35 | # ---------------------------------------------------------------- 36 | # Split data 37 | # ---------------------------------------------------------------- 38 | train_x, val_x, train_y, val_y, train_t, val_t = train_test_split(x, y, t, random_state=self.seed, shuffle=True, 39 | test_size=self.val_split) 40 | 41 | self.normalizer.fit(train_x) 42 | 43 | self.root.num_samples = y.shape[0] 44 | # ---------------------------------------------------------------- 45 | # effect and pvals 46 | # ---------------------------------------------------------------- 47 | effect = tau_squared(y, t) 48 | p_val = get_pval(y, t) 49 | self.root.effect = effect 50 | self.root.p_val = p_val 51 | 52 | # ---------------------------------------------------------------- 53 | # Not sure if i should eval in root or not 54 | # ---------------------------------------------------------------- 55 | node_eval, mse = self._eval(train_y, train_t, val_y, val_t) 56 | self.root.obj = node_eval 57 | 58 | # ---------------------------------------------------------------- 59 | # Add control/treatment means 60 | # ---------------------------------------------------------------- 61 | self.root.control_mean = np.mean(y[t == 0]) 62 | self.root.treatment_mean = np.mean(y[t == 1]) 63 | 64 | self.root.num_samples = x.shape[0] 65 | 66 | self._fit(self.root, train_x, train_y, train_t, val_x, val_y, val_t) 67 | 68 | def _fit(self, node: BaseCausalTreeLearnNode, train_x, train_y, train_t, val_x, val_y, val_t): 69 | 70 | if train_x.shape[0] == 0 or val_x.shape[0] == 0: 71 | node.is_leaf = True 72 | return node 73 | 74 | if node.node_depth > self.tree_depth: 75 | self.tree_depth = node.node_depth 76 | 77 | if self.max_depth == self.tree_depth: 78 | self.num_leaves += 1 79 | node.leaf_num = self.num_leaves 80 | node.is_leaf = True 81 | return node 82 | 83 | best_gain = 0.0 84 | best_attributes = [] 85 | best_tb_obj, best_fb_obj = (0.0, 0.0) 86 | 87 | column_count = train_x.shape[1] 88 | for col in range(0, column_count): 89 | unique_vals = np.unique(train_x[:, col]) 90 | 91 | # ---------------------------------------------------------------- 92 | # TODO: Max values stuff 93 | # ---------------------------------------------------------------- 94 | 95 | # using the faster evaluation with vector/matrix calculations 96 | try: 97 | if self.feature_batch_size is None: 98 | split_obj, upper_obj, lower_obj, value = self._eval_fast(train_x, train_y, train_t, val_x, val_y, 99 | val_t, 100 | unique_vals, col) 101 | gain = -node.obj + split_obj 102 | if gain > best_gain: 103 | best_gain = gain 104 | best_attributes = [col, value] 105 | best_tb_obj, best_fb_obj = (upper_obj, lower_obj) 106 | else: 107 | 108 | for x in batch(unique_vals, self.feature_batch_size): 109 | split_obj, upper_obj, lower_obj, value = self._eval_fast(train_x, train_y, train_t, val_x, 110 | val_y, val_t, x, col) 111 | 112 | gain = -node.obj + split_obj 113 | if gain > best_gain: 114 | best_gain = gain 115 | best_attributes = [col, value] 116 | best_tb_obj, best_fb_obj = (upper_obj, lower_obj) 117 | # if that fails (due to memory maybe?) then use the old calculation 118 | except: 119 | for value in unique_vals: 120 | 121 | (val_x1, val_x2, val_y1, val_y2, val_t1, val_t2) \ 122 | = divide_set(val_x, val_y, val_t, col, value) 123 | 124 | # check validation set size 125 | val_size = self.val_split * self.min_size if self.val_split * self.min_size > 2 else 2 126 | if check_min_size(val_size, val_t1) or check_min_size(val_size, val_t2): 127 | continue 128 | 129 | # check training data size 130 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 131 | = divide_set(train_x, train_y, train_t, col, value) 132 | check1 = check_min_size(self.min_size, train_t1) 133 | check2 = check_min_size(self.min_size, train_t2) 134 | if check1 or check2: 135 | continue 136 | 137 | tb_eval, tb_mse = self._eval(train_y1, train_t1, val_y1, val_t1) 138 | fb_eval, fb_mse = self._eval(train_y2, train_t2, val_y2, val_t2) 139 | 140 | split_eval = (tb_eval + fb_eval) 141 | gain = -node.obj + split_eval 142 | 143 | if gain > best_gain: 144 | best_gain = gain 145 | best_attributes = [col, value] 146 | best_tb_obj, best_fb_obj = (tb_eval, fb_eval) 147 | 148 | if best_gain > 0: 149 | node.col = best_attributes[0] 150 | node.value = best_attributes[1] 151 | 152 | # print(node.col) 153 | 154 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 155 | = divide_set(train_x, train_y, train_t, node.col, node.value) 156 | 157 | (val_x1, val_x2, val_y1, val_y2, val_t1, val_t2) \ 158 | = divide_set(val_x, val_y, val_t, node.col, node.value) 159 | 160 | y1 = np.concatenate((train_y1, val_y1)) 161 | y2 = np.concatenate((train_y2, val_y2)) 162 | t1 = np.concatenate((train_t1, val_t1)) 163 | t2 = np.concatenate((train_t2, val_t2)) 164 | 165 | best_tb_effect = ace(y1, t1) 166 | best_fb_effect = ace(y2, t2) 167 | tb_p_val = get_pval(y1, t1) 168 | fb_p_val = get_pval(y2, t2) 169 | 170 | self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj 171 | 172 | # ---------------------------------------------------------------- 173 | # Ignore "mse" here, come back to it later? 174 | # ---------------------------------------------------------------- 175 | 176 | tb = BaseCausalTreeLearnNode(obj=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val, 177 | node_depth=node.node_depth + 1, 178 | num_samples=y1.shape[0]) 179 | fb = BaseCausalTreeLearnNode(obj=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val, 180 | node_depth=node.node_depth + 1, 181 | num_samples=y2.shape[0]) 182 | 183 | node.true_branch = self._fit(tb, train_x1, train_y1, train_t1, val_x1, val_y1, val_t1) 184 | node.false_branch = self._fit(fb, train_x2, train_y2, train_t2, val_x2, val_y2, val_t2) 185 | 186 | if node.effect > self.max_effect: 187 | self.max_effect = node.effect 188 | if node.effect < self.min_effect: 189 | self.min_effect = node.effect 190 | 191 | return node 192 | 193 | else: 194 | if node.effect > self.max_effect: 195 | self.max_effect = node.effect 196 | if node.effect < self.min_effect: 197 | self.min_effect = node.effect 198 | 199 | self.num_leaves += 1 200 | node.leaf_num = self.num_leaves 201 | node.is_leaf = True 202 | return node 203 | -------------------------------------------------------------------------------- /CTL/causal_tree/ctl_trigger/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/CTL/causal_tree/ctl_trigger/__init__.py -------------------------------------------------------------------------------- /CTL/causal_tree/ctl_trigger/adaptive_trigger.py: -------------------------------------------------------------------------------- 1 | from CTL.causal_tree.ctl_trigger.trigger_ctl import * 2 | from sklearn.model_selection import train_test_split 3 | 4 | 5 | class AdaptiveTriggerNode(TriggerNode): 6 | 7 | def __init__(self, **kwargs): 8 | super().__init__(**kwargs) 9 | 10 | # self.obj = obj 11 | 12 | 13 | # ---------------------------------------------------------------- 14 | # Base causal tree (ctl, base objective) 15 | # ---------------------------------------------------------------- 16 | class AdaptiveTriggerTree(TriggerTree): 17 | 18 | def __init__(self, **kwargs): 19 | super().__init__(**kwargs) 20 | self.root = AdaptiveTriggerNode() 21 | 22 | def adaptive_eval(self, train_y, train_t): 23 | 24 | total_train = train_y.shape[0] 25 | return_val = (-np.inf, -np.inf, -np.inf) 26 | 27 | if total_train == 0: 28 | return return_val 29 | 30 | train_effect, best_trigger = tau_squared_trigger(train_y, train_t, self.min_size, self.quartile) 31 | 32 | if train_effect <= -np.inf: 33 | return return_val 34 | 35 | train_err = train_effect ** 2 36 | 37 | train_mse = total_train * train_err 38 | obj = train_mse 39 | 40 | best_obj = obj 41 | best_mse = train_err 42 | 43 | return best_obj, best_trigger, best_mse 44 | 45 | def fit(self, x, y, t): 46 | if x.shape[0] == 0: 47 | return 0 48 | 49 | # ---------------------------------------------------------------- 50 | # Seed 51 | # ---------------------------------------------------------------- 52 | np.random.seed(self.seed) 53 | 54 | # ---------------------------------------------------------------- 55 | # Verbosity? 56 | # ---------------------------------------------------------------- 57 | 58 | # ---------------------------------------------------------------- 59 | # Split data 60 | # ---------------------------------------------------------------- 61 | 62 | self.root.num_samples = y.shape[0] 63 | # ---------------------------------------------------------------- 64 | # effect and pvals 65 | # ---------------------------------------------------------------- 66 | effect, trigger = tau_squared_trigger(y, t, self.min_size, self.quartile) 67 | p_val = get_pval_trigger(y, t, trigger) 68 | self.root.effect = effect 69 | self.root.p_val = p_val 70 | self.root.trigger = trigger 71 | 72 | # ---------------------------------------------------------------- 73 | # Not sure if i should eval in root or not 74 | # ---------------------------------------------------------------- 75 | node_eval, trigger, mse = self.adaptive_eval(y, t) 76 | self.root.obj = node_eval 77 | 78 | # ---------------------------------------------------------------- 79 | # Add control/treatment means 80 | # ---------------------------------------------------------------- 81 | self.root.control_mean = np.mean(y[t >= trigger]) 82 | self.root.treatment_mean = np.mean(y[t < trigger]) 83 | 84 | self.root.num_samples = x.shape[0] 85 | 86 | self._fit(self.root, x, y, t) 87 | 88 | def _fit(self, node: AdaptiveTriggerNode, train_x, train_y, train_t): 89 | 90 | if train_x.shape[0] == 0: 91 | return node 92 | 93 | if node.node_depth > self.tree_depth: 94 | self.tree_depth = node.node_depth 95 | 96 | if self.max_depth == self.tree_depth: 97 | if node.effect > self.max_effect: 98 | self.max_effect = node.effect 99 | if node.effect < self.min_effect: 100 | self.min_effect = node.effect 101 | self.num_leaves += 1 102 | node.leaf_num = self.num_leaves 103 | node.is_leaf = True 104 | return node 105 | 106 | best_gain = 0.0 107 | best_attributes = [] 108 | best_tb_obj, best_fb_obj = (0.0, 0.0) 109 | best_tb_trigger, best_fb_trigger = (0.0, 0.0) 110 | 111 | column_count = train_x.shape[1] 112 | for col in range(0, column_count): 113 | unique_vals = np.unique(train_x[:, col]) 114 | 115 | if self.max_values is not None: 116 | if self.max_values < 1: 117 | idx = np.round(np.linspace(0, len(unique_vals) - 1, self.max_values * len(unique_vals))).astype(int) 118 | unique_vals = unique_vals[idx] 119 | else: 120 | idx = np.round(np.linspace( 121 | 0, len(unique_vals) - 1, self.max_values)).astype(int) 122 | unique_vals = unique_vals[idx] 123 | 124 | for value in unique_vals: 125 | 126 | # check training data size 127 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 128 | = divide_set(train_x, train_y, train_t, col, value) 129 | check1 = check_min_size(self.min_size, train_t1) 130 | check2 = check_min_size(self.min_size, train_t2) 131 | if check1 or check2: 132 | continue 133 | 134 | tb_eval, tb_trigger, tb_mse = self.adaptive_eval(train_y1, train_t1) 135 | fb_eval, fb_trigger, fb_mse = self.adaptive_eval(train_y2, train_t2) 136 | 137 | split_eval = (tb_eval + fb_eval) 138 | gain = -node.obj + split_eval 139 | 140 | if gain > best_gain: 141 | best_gain = gain 142 | best_attributes = [col, value] 143 | best_tb_obj, best_fb_obj = (tb_eval, fb_eval) 144 | best_tb_trigger, best_fb_trigger = (tb_trigger, fb_trigger) 145 | 146 | if best_gain > 0: 147 | node.col = best_attributes[0] 148 | node.value = best_attributes[1] 149 | 150 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 151 | = divide_set(train_x, train_y, train_t, node.col, node.value) 152 | 153 | y1 = train_y1 154 | y2 = train_y2 155 | t1 = train_t1 156 | t2 = train_t2 157 | 158 | best_tb_effect = ace(y1, t1) 159 | best_fb_effect = ace(y2, t2) 160 | tb_p_val = get_pval(y1, t1) 161 | fb_p_val = get_pval(y2, t2) 162 | 163 | self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj 164 | 165 | # ---------------------------------------------------------------- 166 | # Ignore "mse" here, come back to it later? 167 | # ---------------------------------------------------------------- 168 | 169 | tb = AdaptiveTriggerNode(obj=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val, 170 | node_depth=node.node_depth + 1, 171 | num_samples=y1.shape[0], trigger=best_tb_trigger) 172 | fb = AdaptiveTriggerNode(obj=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val, 173 | node_depth=node.node_depth + 1, 174 | num_samples=y2.shape[0], trigger=best_fb_trigger) 175 | 176 | node.true_branch = self._fit(tb, train_x1, train_y1, train_t1) 177 | node.false_branch = self._fit(fb, train_x2, train_y2, train_t2) 178 | 179 | if node.effect > self.max_effect: 180 | self.max_effect = node.effect 181 | if node.effect < self.min_effect: 182 | self.min_effect = node.effect 183 | 184 | return node 185 | 186 | else: 187 | if node.effect > self.max_effect: 188 | self.max_effect = node.effect 189 | if node.effect < self.min_effect: 190 | self.min_effect = node.effect 191 | 192 | self.num_leaves += 1 193 | node.leaf_num = self.num_leaves 194 | node.is_leaf = True 195 | return node 196 | -------------------------------------------------------------------------------- /CTL/causal_tree/ctl_trigger/ctl_base_trigger.py: -------------------------------------------------------------------------------- 1 | from CTL.causal_tree.ctl_trigger.trigger_ctl import * 2 | from sklearn.model_selection import train_test_split 3 | 4 | 5 | class TriggerBaseNode(TriggerNode): 6 | 7 | def __init__(self, **kwargs): 8 | super().__init__(**kwargs) 9 | 10 | 11 | # ---------------------------------------------------------------- 12 | # Base causal tree (ctl, base objective) 13 | # ---------------------------------------------------------------- 14 | class TriggerTreeBase(TriggerTree): 15 | 16 | def __init__(self, **kwargs): 17 | super().__init__(**kwargs) 18 | self.root = TriggerBaseNode() 19 | 20 | def fit(self, x, y, t): 21 | if x.shape[0] == 0: 22 | return 0 23 | 24 | # ---------------------------------------------------------------- 25 | # Seed 26 | # ---------------------------------------------------------------- 27 | np.random.seed(self.seed) 28 | 29 | # ---------------------------------------------------------------- 30 | # Verbosity? 31 | # ---------------------------------------------------------------- 32 | 33 | # ---------------------------------------------------------------- 34 | # Split data 35 | # ---------------------------------------------------------------- 36 | train_x, val_x, train_y, val_y, train_t, val_t = train_test_split(x, y, t, random_state=self.seed, shuffle=True, 37 | test_size=self.val_split) 38 | self.root.num_samples = y.shape[0] 39 | # ---------------------------------------------------------------- 40 | # effect and pvals 41 | # ---------------------------------------------------------------- 42 | effect, trigger = tau_squared_trigger(y, t, self.min_size, self.quartile) 43 | p_val = get_pval_trigger(y, t, trigger) 44 | self.root.effect = effect 45 | self.root.p_val = p_val 46 | self.root.trigger = trigger 47 | 48 | # ---------------------------------------------------------------- 49 | # Not sure if i should eval in root or not 50 | # ---------------------------------------------------------------- 51 | node_eval, trigger, mse = self._eval(train_y, train_t, val_y, val_t) 52 | self.root.obj = node_eval 53 | 54 | # ---------------------------------------------------------------- 55 | # Add control/treatment means 56 | # ---------------------------------------------------------------- 57 | self.root.control_mean = np.mean(y[t >= trigger]) 58 | self.root.treatment_mean = np.mean(y[t < trigger]) 59 | 60 | self.root.num_samples = x.shape[0] 61 | 62 | self._fit(self.root, train_x, train_y, train_t, val_x, val_y, val_t) 63 | 64 | def _fit(self, node: TriggerBaseNode, train_x, train_y, train_t, val_x, val_y, val_t): 65 | 66 | if train_x.shape[0] == 0 or val_x.shape[0] == 0: 67 | return node 68 | 69 | if node.node_depth > self.tree_depth: 70 | self.tree_depth = node.node_depth 71 | 72 | if self.max_depth == self.tree_depth: 73 | if node.effect > self.max_effect: 74 | self.max_effect = node.effect 75 | if node.effect < self.min_effect: 76 | self.min_effect = node.effect 77 | self.num_leaves += 1 78 | node.leaf_num = self.num_leaves 79 | node.is_leaf = True 80 | return node 81 | 82 | best_gain = 0.0 83 | best_attributes = [] 84 | best_tb_obj, best_fb_obj = (0.0, 0.0) 85 | best_tb_trigger, best_fb_trigger = (0.0, 0.0) 86 | 87 | column_count = train_x.shape[1] 88 | for col in range(0, column_count): 89 | unique_vals = np.unique(train_x[:, col]) 90 | 91 | if self.max_values is not None: 92 | if self.max_values < 1: 93 | idx = np.round(np.linspace(0, len(unique_vals) - 1, self.max_values * len(unique_vals))).astype(int) 94 | unique_vals = unique_vals[idx] 95 | else: 96 | idx = np.round(np.linspace( 97 | 0, len(unique_vals) - 1, self.max_values)).astype(int) 98 | unique_vals = unique_vals[idx] 99 | 100 | for value in unique_vals: 101 | 102 | (val_x1, val_x2, val_y1, val_y2, val_t1, val_t2) \ 103 | = divide_set(val_x, val_y, val_t, col, value) 104 | 105 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 106 | = divide_set(train_x, train_y, train_t, col, value) 107 | 108 | tb_eval, tb_trigger, tb_mse = self._eval(train_y1, train_t1, val_y1, val_t1) 109 | fb_eval, fb_trigger, fb_mse = self._eval(train_y2, train_t2, val_y2, val_t2) 110 | 111 | split_eval = (tb_eval + fb_eval) 112 | gain = -node.obj + split_eval 113 | 114 | if gain > best_gain: 115 | best_gain = gain 116 | best_attributes = [col, value] 117 | best_tb_obj, best_fb_obj = (tb_eval, fb_eval) 118 | best_tb_trigger, best_fb_trigger = (tb_trigger, fb_trigger) 119 | 120 | if best_gain > 0: 121 | node.col = best_attributes[0] 122 | node.value = best_attributes[1] 123 | 124 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 125 | = divide_set(train_x, train_y, train_t, node.col, node.value) 126 | 127 | (val_x1, val_x2, val_y1, val_y2, val_t1, val_t2) \ 128 | = divide_set(val_x, val_y, val_t, node.col, node.value) 129 | 130 | y1 = np.concatenate((train_y1, val_y1)) 131 | y2 = np.concatenate((train_y2, val_y2)) 132 | t1 = np.concatenate((train_t1, val_t1)) 133 | t2 = np.concatenate((train_t2, val_t2)) 134 | 135 | best_tb_effect = ace_trigger(y1, t1, best_tb_trigger) 136 | best_fb_effect = ace_trigger(y2, t2, best_fb_trigger) 137 | tb_p_val = get_pval_trigger(y1, t1, best_tb_trigger) 138 | fb_p_val = get_pval_trigger(y2, t2, best_fb_trigger) 139 | 140 | self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj 141 | 142 | # ---------------------------------------------------------------- 143 | # Ignore "mse" here, come back to it later? 144 | # ---------------------------------------------------------------- 145 | 146 | tb = TriggerBaseNode(obj=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val, 147 | node_depth=node.node_depth + 1, 148 | num_samples=y1.shape[0], trigger=best_tb_trigger) 149 | fb = TriggerBaseNode(obj=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val, 150 | node_depth=node.node_depth + 1, 151 | num_samples=y2.shape[0], trigger=best_fb_trigger) 152 | 153 | node.true_branch = self._fit(tb, train_x1, train_y1, train_t1, val_x1, val_y1, val_t1) 154 | node.false_branch = self._fit(fb, train_x2, train_y2, train_t2, val_x2, val_y2, val_t2) 155 | 156 | if node.effect > self.max_effect: 157 | self.max_effect = node.effect 158 | if node.effect < self.min_effect: 159 | self.min_effect = node.effect 160 | 161 | return node 162 | 163 | else: 164 | if node.effect > self.max_effect: 165 | self.max_effect = node.effect 166 | if node.effect < self.min_effect: 167 | self.min_effect = node.effect 168 | 169 | self.num_leaves += 1 170 | node.leaf_num = self.num_leaves 171 | node.is_leaf = True 172 | return node 173 | -------------------------------------------------------------------------------- /CTL/causal_tree/nn_pehe/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/CTL/causal_tree/nn_pehe/__init__.py -------------------------------------------------------------------------------- /CTL/causal_tree/nn_pehe/balance_split.py: -------------------------------------------------------------------------------- 1 | from CTL.causal_tree.nn_pehe.tree import * 2 | 3 | 4 | class BaseNode(PEHENode): 5 | 6 | def __init__(self, **kwargs): 7 | super().__init__(**kwargs) 8 | 9 | # self.obj = obj 10 | 11 | 12 | # ---------------------------------------------------------------- 13 | # Base causal tree (ctl, base objective) 14 | # ---------------------------------------------------------------- 15 | class BalanceBasePEHE(PEHETree): 16 | 17 | def __init__(self, eval2=False, **kwargs): 18 | super().__init__(**kwargs) 19 | self.root = BaseNode() 20 | self.eval2 = eval2 21 | 22 | def fit(self, x, y, t): 23 | if x.shape[0] == 0: 24 | return 0 25 | 26 | # ---------------------------------------------------------------- 27 | # Seed 28 | # ---------------------------------------------------------------- 29 | np.random.seed(self.seed) 30 | 31 | self.root.num_samples = y.shape[0] 32 | self.num_training = y.shape[0] 33 | 34 | # ---------------------------------------------------------------- 35 | # NN_effect estimates 36 | # use the overall datasets for nearest neighbor for now 37 | # ---------------------------------------------------------------- 38 | nn_effect = self.compute_nn_effect(x, y, t, k=self.k) 39 | 40 | # ---------------------------------------------------------------- 41 | # effect and pvals 42 | # ---------------------------------------------------------------- 43 | effect = tau_squared(y, t) 44 | p_val = get_pval(y, t) 45 | self.root.effect = effect 46 | self.root.p_val = p_val 47 | 48 | # ---------------------------------------------------------------- 49 | # Not sure if i should eval in root or not 50 | # ---------------------------------------------------------------- 51 | nn_pehe = self._eval(y, t, nn_effect) 52 | self.root.pehe = nn_pehe 53 | self.pehe = self.root.pehe 54 | 55 | # ---------------------------------------------------------------- 56 | # Add control/treatment means 57 | # ---------------------------------------------------------------- 58 | self.root.control_mean = np.mean(y[t == 0]) 59 | self.root.treatment_mean = np.mean(y[t == 1]) 60 | 61 | self.root.num_samples = x.shape[0] 62 | 63 | self._fit(self.root, x, y, t, nn_effect) 64 | 65 | if self.num_leaves > 0: 66 | self.pehe = self.pehe / self.num_leaves 67 | 68 | def _eval(self, train_y, train_t, nn_effect): 69 | 70 | # treated = np.where(train_t == 1)[0] 71 | # control = np.where(train_t == 0)[0] 72 | # pred_effect = np.mean(train_y[treated]) - np.mean(train_y[control]) 73 | pred_effect = ace(train_y, train_t) 74 | 75 | # nn_pehe = np.mean((nn_effect - pred_effect) ** 2) 76 | nn_pehe = np.sum((nn_effect - pred_effect) ** 2) 77 | 78 | return nn_pehe 79 | 80 | def _fit(self, node: BaseNode, train_x, train_y, train_t, nn_effect): 81 | 82 | if train_x.shape[0] == 0: 83 | return node 84 | 85 | if node.node_depth > self.tree_depth: 86 | self.tree_depth = node.node_depth 87 | 88 | if self.max_depth == self.tree_depth: 89 | self.num_leaves += 1 90 | node.leaf_num = self.num_leaves 91 | node.is_leaf = True 92 | return node 93 | 94 | # print(self.tree_depth, self.obj) 95 | 96 | best_gain = 0.0 97 | # best_gain = node.pehe # min amount 98 | best_attributes = [] 99 | best_tb_obj, best_fb_obj = (0.0, 0.0) 100 | 101 | column_count = train_x.shape[1] 102 | for col in range(0, column_count): 103 | unique_vals = np.unique(train_x[:, col]) 104 | 105 | for value in unique_vals: 106 | # check training data size 107 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 108 | = divide_set(train_x, train_y, train_t, col, value) 109 | check1 = check_min_size(self.min_size, train_t1) 110 | check2 = check_min_size(self.min_size, train_t2) 111 | if check1 or check2: 112 | continue 113 | (_, _, nn_effect1, nn_effect2, _, _) \ 114 | = divide_set(train_x, nn_effect, train_t, col, value) 115 | 116 | tb_eval = self._eval(train_y1, train_t1, nn_effect1) 117 | fb_eval = self._eval(train_y2, train_t2, nn_effect2) 118 | 119 | split_difference = np.abs(tb_eval - fb_eval) 120 | 121 | split_eval = (tb_eval + fb_eval) 122 | gain = node.pehe - split_eval - split_difference 123 | 124 | if gain > best_gain: 125 | best_gain = gain 126 | best_attributes = [col, value] 127 | best_tb_obj, best_fb_obj = (tb_eval, fb_eval) 128 | # if self.eval2: 129 | # split_eval, value, tb_eval, fb_eval = self._eval2(unique_vals, train_x, train_y, train_t, nn_effect, 130 | # col, node.pehe) 131 | # 132 | # gain = node.pehe - split_eval 133 | # 134 | # if gain > best_gain: 135 | # best_gain = gain 136 | # best_attributes = [col, value] 137 | # best_tb_obj, best_fb_obj = (tb_eval, fb_eval) 138 | # else: 139 | # for value in unique_vals: 140 | # # check training data size 141 | # (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 142 | # = divide_set(train_x, train_y, train_t, col, value) 143 | # check1 = check_min_size(self.min_size, train_t1) 144 | # check2 = check_min_size(self.min_size, train_t2) 145 | # if check1 or check2: 146 | # continue 147 | # (_, _, nn_effect1, nn_effect2, _, _) \ 148 | # = divide_set(train_x, nn_effect, train_t, col, value) 149 | # 150 | # tb_eval = self._eval(train_y1, train_t1, nn_effect1) 151 | # fb_eval = self._eval(train_y2, train_t2, nn_effect2) 152 | # 153 | # split_eval = (tb_eval + fb_eval) 154 | # gain = node.pehe - split_eval 155 | # 156 | # if gain > best_gain: 157 | # best_gain = gain 158 | # best_attributes = [col, value] 159 | # best_tb_obj, best_fb_obj = (tb_eval, fb_eval) 160 | 161 | if best_gain > 0: 162 | node.col = best_attributes[0] 163 | node.value = best_attributes[1] 164 | 165 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 166 | = divide_set(train_x, train_y, train_t, node.col, node.value) 167 | (_, _, nn_effect1, nn_effect2, _, _) \ 168 | = divide_set(train_x, nn_effect, train_t, node.col, node.value) 169 | 170 | y1 = train_y1 171 | y2 = train_y2 172 | t1 = train_t1 173 | t2 = train_t2 174 | 175 | best_tb_effect = ace(y1, t1) 176 | best_fb_effect = ace(y2, t2) 177 | tb_p_val = get_pval(y1, t1) 178 | fb_p_val = get_pval(y2, t2) 179 | 180 | self.pehe = self.pehe - node.pehe + best_tb_obj + best_fb_obj 181 | 182 | tb = BaseNode(obj=best_tb_obj, pehe=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val, 183 | node_depth=node.node_depth + 1, 184 | num_samples=y1.shape[0]) 185 | fb = BaseNode(obj=best_fb_obj, pehe=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val, 186 | node_depth=node.node_depth + 1, 187 | num_samples=y2.shape[0]) 188 | 189 | node.true_branch = self._fit(tb, train_x1, train_y1, train_t1, nn_effect1) 190 | node.false_branch = self._fit(fb, train_x2, train_y2, train_t2, nn_effect2) 191 | 192 | if node.effect > self.max_effect: 193 | self.max_effect = node.effect 194 | if node.effect < self.min_effect: 195 | self.min_effect = node.effect 196 | 197 | return node 198 | 199 | else: 200 | if node.effect > self.max_effect: 201 | self.max_effect = node.effect 202 | if node.effect < self.min_effect: 203 | self.min_effect = node.effect 204 | 205 | self.num_leaves += 1 206 | node.leaf_num = self.num_leaves 207 | node.is_leaf = True 208 | return node 209 | -------------------------------------------------------------------------------- /CTL/causal_tree/nn_pehe/honest.py: -------------------------------------------------------------------------------- 1 | from CTL.causal_tree.nn_pehe.tree import * 2 | from sklearn.model_selection import train_test_split 3 | 4 | 5 | class HonestNode(PEHENode): 6 | 7 | def __init__(self, **kwargs): 8 | super().__init__(**kwargs) 9 | 10 | # self.obj = obj 11 | 12 | 13 | # ---------------------------------------------------------------- 14 | # Base causal tree (ctl, base objective) 15 | # ---------------------------------------------------------------- 16 | class HonestPEHE(PEHETree): 17 | 18 | def __init__(self, **kwargs): 19 | super().__init__(**kwargs) 20 | self.root = HonestNode() 21 | 22 | def fit(self, x, y, t): 23 | if x.shape[0] == 0: 24 | return 0 25 | 26 | # ---------------------------------------------------------------- 27 | # Seed 28 | # ---------------------------------------------------------------- 29 | np.random.seed(self.seed) 30 | 31 | # ---------------------------------------------------------------- 32 | # Split data 33 | # ---------------------------------------------------------------- 34 | x, est_x, y, est_y, t, est_t = train_test_split(x, y, t, random_state=self.seed, shuffle=True, 35 | test_size=0.5) 36 | self.root.num_samples = est_y.shape[0] 37 | self.num_training = y.shape[0] 38 | 39 | # ---------------------------------------------------------------- 40 | # NN_effect estimates 41 | # use the overall datasets for nearest neighbor for now 42 | # ---------------------------------------------------------------- 43 | nn_effect = compute_nn_effect(x, y, t, k=self.k) 44 | # val_nn_effect = compute_nn_effect(est_x, est_y, est_t, k=self.k) 45 | 46 | # ---------------------------------------------------------------- 47 | # effect and pvals 48 | # ---------------------------------------------------------------- 49 | effect = tau_squared(y, t) 50 | p_val = get_pval(y, t) 51 | self.root.effect = effect 52 | self.root.p_val = p_val 53 | 54 | # ---------------------------------------------------------------- 55 | # Not sure if i should eval in root or not 56 | # ---------------------------------------------------------------- 57 | nn_pehe = self._eval(y, t, nn_effect) 58 | self.root.obj = nn_pehe 59 | self.obj = self.root.obj 60 | 61 | # ---------------------------------------------------------------- 62 | # Add control/treatment means 63 | # ---------------------------------------------------------------- 64 | self.root.control_mean = np.mean(y[t == 0]) 65 | self.root.treatment_mean = np.mean(y[t == 1]) 66 | 67 | self.root.num_samples = x.shape[0] 68 | 69 | self._fit(self.root, x, y, t, nn_effect, est_x, est_y, est_t) 70 | 71 | if self.num_leaves > 0: 72 | self.obj = self.obj / self.num_leaves 73 | 74 | def _eval(self, train_y, train_t, nn_effect): 75 | 76 | # total_train = train_y.shape[0] 77 | 78 | # treated = np.where(train_t == 1)[0] 79 | # control = np.where(train_t == 0)[0] 80 | # pred_effect = np.mean(train_y[treated]) - np.mean(train_y[control]) 81 | pred_effect = ace(train_y, train_t) 82 | 83 | # nn_pehe = np.mean((nn_effect - pred_effect) ** 2) 84 | nn_pehe = np.sum((nn_effect - pred_effect) ** 2) 85 | 86 | # val_effect = ace(val_y, val_t) 87 | # val_nn_pehe = np.sum((val_nn_effect - pred_effect) ** 2) 88 | # val_train_ratio = total_train / total_val 89 | # val_nn_pehe = val_nn_pehe * val_train_ratio 90 | # pehe_diff = np.abs(nn_pehe - val_nn_pehe) 91 | 92 | # cost = np.abs(total_train * pred_effect - total_train * val_effect) 93 | 94 | var_t, var_c = variance(train_y, train_t) 95 | 96 | return nn_pehe 97 | 98 | def _fit(self, node: HonestNode, train_x, train_y, train_t, nn_effect, est_x, est_y, est_t): 99 | 100 | if train_x.shape[0] == 0: 101 | return node 102 | 103 | if node.node_depth > self.tree_depth: 104 | self.tree_depth = node.node_depth 105 | 106 | if self.max_depth == self.tree_depth: 107 | if node.effect > self.max_effect: 108 | self.max_effect = node.effect 109 | if node.effect < self.min_effect: 110 | self.min_effect = node.effect 111 | self.num_leaves += 1 112 | node.leaf_num = self.num_leaves 113 | node.is_leaf = True 114 | return node 115 | 116 | # print(self.tree_depth, self.obj) 117 | 118 | best_gain = 0.0 119 | best_attributes = [] 120 | best_tb_obj, best_fb_obj = (0.0, 0.0) 121 | 122 | column_count = train_x.shape[1] 123 | for col in range(0, column_count): 124 | unique_vals = np.unique(train_x[:, col]) 125 | 126 | for value in unique_vals: 127 | (est_x1, est_x2, est_y1, est_y2, est_t1, est_t2) \ 128 | = divide_set(est_x, est_y, est_t, col, value) 129 | 130 | # check est set size 131 | if check_min_size(self.min_size, est_t1) or check_min_size(self.min_size, est_t2): 132 | continue 133 | 134 | # check training data size 135 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 136 | = divide_set(train_x, train_y, train_t, col, value) 137 | check1 = check_min_size(self.min_size, train_t1) 138 | check2 = check_min_size(self.min_size, train_t2) 139 | if check1 or check2: 140 | continue 141 | (_, _, nn_effect1, nn_effect2, _, _) \ 142 | = divide_set(train_x, nn_effect, train_t, col, value) 143 | 144 | tb_eval = self._eval(train_y1, train_t1, nn_effect1) 145 | fb_eval = self._eval(train_y2, train_t2, nn_effect2) 146 | 147 | split_eval = (tb_eval + fb_eval) 148 | gain = node.obj - split_eval 149 | 150 | if gain > best_gain: 151 | best_gain = gain 152 | best_attributes = [col, value] 153 | best_tb_obj, best_fb_obj = (tb_eval, fb_eval) 154 | 155 | # print(tb_eval, fb_eval, gain, best_gain) 156 | 157 | if best_gain > 0: 158 | node.col = best_attributes[0] 159 | node.value = best_attributes[1] 160 | 161 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 162 | = divide_set(train_x, train_y, train_t, node.col, node.value) 163 | (est_x1, est_x2, est_y1, est_y2, est_t1, est_t2) \ 164 | = divide_set(est_x, est_y, est_t, node.col, node.value) 165 | (_, _, nn_effect1, nn_effect2, _, _) \ 166 | = divide_set(train_x, nn_effect, train_t, node.col, node.value) 167 | 168 | # y1 = train_y1 169 | # y2 = train_y2 170 | # t1 = train_t1 171 | # t2 = train_t2 172 | # y1 = np.concatenate((train_y1, val_y1)) 173 | # y2 = np.concatenate((train_y2, val_y2)) 174 | # t1 = np.concatenate((train_t1, val_t1)) 175 | # t2 = np.concatenate((train_t2, val_t2)) 176 | y1 = est_y1 177 | y2 = est_y2 178 | t1 = est_t1 179 | t2 = est_t2 180 | 181 | best_tb_effect = ace(y1, t1) 182 | best_fb_effect = ace(y2, t2) 183 | tb_p_val = get_pval(y1, t1) 184 | fb_p_val = get_pval(y2, t2) 185 | 186 | self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj 187 | 188 | tb = HonestNode(obj=best_tb_obj, pehe=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val, 189 | node_depth=node.node_depth + 1, 190 | num_samples=train_y1.shape[0]) 191 | fb = HonestNode(obj=best_fb_obj, pehe=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val, 192 | node_depth=node.node_depth + 1, 193 | num_samples=train_y2.shape[0]) 194 | 195 | node.true_branch = self._fit(tb, train_x1, train_y1, train_t1, nn_effect1, est_x1, est_y1, est_t1) 196 | node.false_branch = self._fit(fb, train_x2, train_y2, train_t2, nn_effect2, est_x2, est_y2, est_t2) 197 | 198 | if node.effect > self.max_effect: 199 | self.max_effect = node.effect 200 | if node.effect < self.min_effect: 201 | self.min_effect = node.effect 202 | 203 | return node 204 | 205 | else: 206 | if node.effect > self.max_effect: 207 | self.max_effect = node.effect 208 | if node.effect < self.min_effect: 209 | self.min_effect = node.effect 210 | 211 | self.num_leaves += 1 212 | node.leaf_num = self.num_leaves 213 | node.is_leaf = True 214 | return node 215 | -------------------------------------------------------------------------------- /CTL/causal_tree/nn_pehe/tree.py: -------------------------------------------------------------------------------- 1 | try: 2 | from CTL.causal_tree.util_c import * 3 | except: 4 | from CTL.causal_tree.util import * 5 | from CTL.causal_tree.ct import * 6 | import numpy as np 7 | from scipy.spatial import cKDTree 8 | 9 | 10 | # TODO: Add weighting on evaluations 11 | # TODO: add weighting on k > 1 nearest neighbors? 12 | 13 | def compute_nn_effect(x, y, t, k=1): 14 | kdtree = cKDTree(x) 15 | d, idx = kdtree.query(x, k=x.shape[0]) 16 | idx = idx[:, 1:] 17 | treated = np.where(t == 1)[0] 18 | control = np.where(t == 0)[0] 19 | bool_treated = np.isin(idx, treated) 20 | bool_control = np.isin(idx, control) 21 | 22 | nn_effect = np.zeros(x.shape[0]) 23 | for i in range(len(bool_treated)): 24 | i_treat_idx = np.where(bool_treated[i, :])[0][:k] 25 | i_control_idx = np.where(bool_control[i, :])[0][:k] 26 | 27 | i_treat_nn = y[idx[i, i_treat_idx]] 28 | i_cont_nn = y[idx[i, i_control_idx]] 29 | 30 | nn_effect[i] = np.mean(i_treat_nn) - np.mean(i_cont_nn) 31 | 32 | return nn_effect 33 | 34 | 35 | class PEHENode(CTNode): 36 | 37 | def __init__(self, p_val=1.0, effect=0.0, node_depth=0, control_mean=0.0, treatment_mean=0.0, col=-1, value=-1, 38 | is_leaf=False, leaf_num=-1, num_samples=0.0, obj=0.0, pehe=0.0): 39 | super().__init__() 40 | # not tree specific features (most likely added at creation) 41 | self.p_val = p_val 42 | self.effect = effect 43 | self.node_depth = node_depth 44 | self.control_mean = control_mean 45 | self.treatment_mean = treatment_mean 46 | 47 | # during tree building 48 | self.obj = obj 49 | self.num_samples = num_samples 50 | self.pehe = pehe 51 | 52 | # after building tree 53 | self.col = col 54 | self.value = value 55 | self.is_leaf = is_leaf 56 | self.leaf_num = leaf_num 57 | self.true_branch = None 58 | self.false_branch = None 59 | 60 | # after calling functions 61 | self.column_name = "" 62 | self.decision = "" 63 | 64 | 65 | class PEHETree(CausalTree): 66 | 67 | def __init__(self, split_size=0.5, max_depth=-1, min_size=2, max_values=None, verbose=False, 68 | k=1, use_propensity=False, propensity_model=None, 69 | seed=724): 70 | super().__init__() 71 | self.val_split = split_size 72 | self.max_depth = max_depth 73 | self.min_size = min_size 74 | self.seed = seed 75 | 76 | self.max_values = max_values 77 | self.verbose = verbose 78 | 79 | self.max_effect = 0.0 80 | self.min_effect = 0.0 81 | 82 | self.features = None 83 | 84 | self.k = k 85 | self.num_training = 1 86 | self.pehe = 0 87 | self.use_propensity = use_propensity 88 | if use_propensity: 89 | if propensity_model is not None: 90 | self.proensity_model = propensity_model 91 | else: 92 | from sklearn.linear_model import LogisticRegression 93 | self.proensity_model = LogisticRegression() 94 | 95 | self.root = PEHENode() 96 | 97 | def compute_nn_effect(self, x, y, t, k=1): 98 | if self.use_propensity: 99 | self.proensity_model.fit(x, t) 100 | propensity = self.proensity_model.predict_proba(x)[:, 1:] 101 | kdtree = cKDTree(propensity) 102 | _, idx = kdtree.query(propensity, k=x.shape[0]) 103 | else: 104 | kdtree = cKDTree(x) 105 | _, idx = kdtree.query(x, k=x.shape[0]) 106 | idx = idx[:, 1:] 107 | treated = np.where(t == 1)[0] 108 | control = np.where(t == 0)[0] 109 | bool_treated = np.isin(idx, treated) 110 | bool_control = np.isin(idx, control) 111 | 112 | nn_effect = np.zeros(x.shape) 113 | for i in range(len(bool_treated)): 114 | i_treat_idx = np.where(bool_treated[i, :])[0][:k] 115 | i_control_idx = np.where(bool_control[i, :])[0][:k] 116 | 117 | i_treat_nn = y[idx[i, i_treat_idx]] 118 | i_cont_nn = y[idx[i, i_control_idx]] 119 | 120 | nn_effect[i] = np.mean(i_treat_nn) - np.mean(i_cont_nn) 121 | 122 | return nn_effect 123 | 124 | @abstractmethod 125 | def fit(self, x, y, t): 126 | pass 127 | 128 | def predict(self, x): 129 | 130 | def _predict(node: PEHENode, observation): 131 | if node.is_leaf: 132 | return node.effect 133 | else: 134 | v = observation[node.col] 135 | if v >= node.value: 136 | branch = node.true_branch 137 | else: 138 | branch = node.false_branch 139 | 140 | return _predict(branch, observation) 141 | 142 | if len(x.shape) == 1: 143 | prediction = _predict(self.root, x) 144 | return prediction 145 | 146 | num_test = x.shape[0] 147 | 148 | prediction = np.zeros(num_test) 149 | 150 | for i in range(num_test): 151 | test_example = x[i, :] 152 | prediction[i] = _predict(self.root, test_example) 153 | 154 | return prediction 155 | 156 | def get_groups(self, x): 157 | 158 | def _get_group(node: PEHENode, observation): 159 | if node.is_leaf: 160 | return node.leaf_num 161 | else: 162 | v = observation[node.col] 163 | if v >= node.value: 164 | branch = node.true_branch 165 | else: 166 | branch = node.false_branch 167 | 168 | return _get_group(branch, observation) 169 | 170 | if len(x.shape) == 1: 171 | return _get_group(self.root, x) 172 | num_test = x.shape[0] 173 | leaf_results = np.zeros(num_test) 174 | 175 | for i in range(num_test): 176 | test_example = x[i, :] 177 | leaf_results[i] = _get_group(self.root, test_example) 178 | 179 | return leaf_results 180 | 181 | def get_features(self, x): 182 | 183 | def _get_features(node: PEHENode, observation, features): 184 | if node.is_leaf: 185 | return features 186 | else: 187 | v = observation[node.col] 188 | if v >= node.value: 189 | branch = node.true_branch 190 | else: 191 | branch = node.false_branch 192 | 193 | features.append(node.decision) 194 | return _get_features(branch, observation, features) 195 | 196 | if len(x.shape) == 1: 197 | features = [] 198 | return _get_features(self.root, x, features) 199 | num_test = x.shape[0] 200 | leaf_features = [] 201 | 202 | for i in range(num_test): 203 | features = [] 204 | test_example = x[i, :] 205 | leaf_features.append(_get_features(self.root, test_example, features)) 206 | 207 | return leaf_features 208 | 209 | def prune(self, alpha=0.05): 210 | 211 | def _prune(node: PEHENode): 212 | if node.true_branch is None or node.false_branch is None: 213 | return 214 | 215 | # recursive call for each branch 216 | if not node.true_branch.is_leaf: 217 | _prune(node.true_branch) 218 | if not node.false_branch.is_leaf: 219 | _prune(node.false_branch) 220 | 221 | # merge leaves (potentially) 222 | if node.true_branch.is_leaf and node.false_branch.is_leaf: 223 | # Get branches 224 | tb = node.true_branch 225 | fb = node.false_branch 226 | 227 | tb_pval = tb.p_val 228 | fb_pval = fb.p_val 229 | 230 | if tb_pval > alpha and fb_pval > alpha: 231 | node.leaf_num = node.true_branch.leaf_num 232 | node.true_branch = None 233 | node.false_branch = None 234 | self.num_leaves = self.num_leaves - 1 235 | node.is_leaf = True 236 | 237 | # ---------------------------------------------------------------- 238 | # Something about obj/mse? if that is added 239 | # 240 | # - can do a self function so that tree references itself/it's own type of node? 241 | # ---------------------------------------------------------------- 242 | if tb.node_depth == self.tree_depth: 243 | self.tree_depth = self.tree_depth - 1 244 | 245 | _prune(self.root) 246 | 247 | def get_triggers(self, x): 248 | pass 249 | 250 | def save(self, filename): 251 | import pickle as pkl 252 | 253 | check_dir(filename) 254 | with open(filename, "wb") as file: 255 | pkl.dump(self, file) 256 | -------------------------------------------------------------------------------- /CTL/causal_tree/r_tree/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/CTL/causal_tree/r_tree/__init__.py -------------------------------------------------------------------------------- /CTL/causal_tree/r_tree/tree.py: -------------------------------------------------------------------------------- 1 | try: 2 | from CTL.causal_tree.util_c import * 3 | except: 4 | from CTL.causal_tree.util import * 5 | from CTL.causal_tree.ct import * 6 | import numpy as np 7 | from scipy.spatial import cKDTree 8 | 9 | 10 | # TODO: Add weighting on evaluations 11 | # TODO: add weighting on k > 1 nearest neighbors? 12 | 13 | def compute_nn_effect(x, y, t, k=1): 14 | kdtree = cKDTree(x) 15 | d, idx = kdtree.query(x, k=x.shape[0]) 16 | idx = idx[:, 1:] 17 | treated = np.where(t == 1)[0] 18 | control = np.where(t == 0)[0] 19 | bool_treated = np.isin(idx, treated) 20 | bool_control = np.isin(idx, control) 21 | 22 | nn_effect = np.zeros(x.shape) 23 | for i in range(len(bool_treated)): 24 | i_treat_idx = np.where(bool_treated[i, :])[0][:k] 25 | i_control_idx = np.where(bool_control[i, :])[0][:k] 26 | 27 | i_treat_nn = y[idx[i, i_treat_idx]] 28 | i_cont_nn = y[idx[i, i_control_idx]] 29 | 30 | nn_effect[i] = np.mean(i_treat_nn) - np.mean(i_cont_nn) 31 | 32 | return nn_effect 33 | 34 | 35 | class RNode(CTNode): 36 | 37 | def __init__(self, p_val=1.0, effect=0.0, node_depth=0, control_mean=0.0, treatment_mean=0.0, col=-1, value=-1, 38 | is_leaf=False, leaf_num=-1, num_samples=0.0, obj=0.0, pehe=0.0): 39 | super().__init__() 40 | # not tree specific features (most likely added at creation) 41 | self.p_val = p_val 42 | self.effect = effect 43 | self.node_depth = node_depth 44 | self.control_mean = control_mean 45 | self.treatment_mean = treatment_mean 46 | 47 | # during tree building 48 | self.obj = obj 49 | self.num_samples = num_samples 50 | self.pehe = pehe 51 | 52 | # after building tree 53 | self.col = col 54 | self.value = value 55 | self.is_leaf = is_leaf 56 | self.leaf_num = leaf_num 57 | self.true_branch = None 58 | self.false_branch = None 59 | 60 | # after calling functions 61 | self.column_name = "" 62 | self.decision = "" 63 | 64 | 65 | class RTree(CausalTree): 66 | 67 | def __init__(self, split_size=0.5, max_depth=-1, min_size=2, max_values=None, verbose=False, 68 | k=1, use_propensity=False, propensity_model=None, 69 | seed=724): 70 | super().__init__() 71 | self.val_split = split_size 72 | self.max_depth = max_depth 73 | self.min_size = min_size 74 | self.seed = seed 75 | 76 | self.max_values = max_values 77 | self.verbose = verbose 78 | 79 | self.max_effect = 0.0 80 | self.min_effect = 0.0 81 | 82 | self.features = None 83 | 84 | self.k = k 85 | self.num_training = 1 86 | self.pehe = 0 87 | self.use_propensity = use_propensity 88 | if use_propensity: 89 | if propensity_model is not None: 90 | self.proensity_model = propensity_model 91 | else: 92 | from sklearn.linear_model import LogisticRegression 93 | self.proensity_model = LogisticRegression() 94 | 95 | self.root = RNode() 96 | 97 | def compute_nn_effect(self, x, y, t, k=1): 98 | if self.use_propensity: 99 | self.proensity_model.fit(x, t) 100 | propensity = self.proensity_model.predict_proba(x)[:, 1:] 101 | kdtree = cKDTree(propensity) 102 | _, idx = kdtree.query(propensity, k=x.shape[0]) 103 | else: 104 | kdtree = cKDTree(x) 105 | _, idx = kdtree.query(x, k=x.shape[0]) 106 | idx = idx[:, 1:] 107 | treated = np.where(t == 1)[0] 108 | control = np.where(t == 0)[0] 109 | bool_treated = np.isin(idx, treated) 110 | bool_control = np.isin(idx, control) 111 | 112 | nn_effect = np.zeros(x.shape) 113 | for i in range(len(bool_treated)): 114 | i_treat_idx = np.where(bool_treated[i, :])[0][:k] 115 | i_control_idx = np.where(bool_control[i, :])[0][:k] 116 | 117 | i_treat_nn = y[idx[i, i_treat_idx]] 118 | i_cont_nn = y[idx[i, i_control_idx]] 119 | 120 | nn_effect[i] = np.mean(i_treat_nn) - np.mean(i_cont_nn) 121 | 122 | return nn_effect 123 | 124 | @abstractmethod 125 | def fit(self, x, y, t): 126 | pass 127 | 128 | def predict(self, x): 129 | 130 | def _predict(node: PEHENode, observation): 131 | if node.is_leaf: 132 | return node.effect 133 | else: 134 | v = observation[node.col] 135 | if v >= node.value: 136 | branch = node.true_branch 137 | else: 138 | branch = node.false_branch 139 | 140 | return _predict(branch, observation) 141 | 142 | if len(x.shape) == 1: 143 | prediction = _predict(self.root, x) 144 | return prediction 145 | 146 | num_test = x.shape[0] 147 | 148 | prediction = np.zeros(num_test) 149 | 150 | for i in range(num_test): 151 | test_example = x[i, :] 152 | prediction[i] = _predict(self.root, test_example) 153 | 154 | return prediction 155 | 156 | def get_groups(self, x): 157 | 158 | def _get_group(node: PEHENode, observation): 159 | if node.is_leaf: 160 | return node.leaf_num 161 | else: 162 | v = observation[node.col] 163 | if v >= node.value: 164 | branch = node.true_branch 165 | else: 166 | branch = node.false_branch 167 | 168 | return _get_group(branch, observation) 169 | 170 | if len(x.shape) == 1: 171 | return _get_group(self.root, x) 172 | num_test = x.shape[0] 173 | leaf_results = np.zeros(num_test) 174 | 175 | for i in range(num_test): 176 | test_example = x[i, :] 177 | leaf_results[i] = _get_group(self.root, test_example) 178 | 179 | return leaf_results 180 | 181 | def get_features(self, x): 182 | 183 | def _get_features(node: PEHENode, observation, features): 184 | if node.is_leaf: 185 | return features 186 | else: 187 | v = observation[node.col] 188 | if v >= node.value: 189 | branch = node.true_branch 190 | else: 191 | branch = node.false_branch 192 | 193 | features.append(node.decision) 194 | return _get_features(branch, observation, features) 195 | 196 | if len(x.shape) == 1: 197 | features = [] 198 | return _get_features(self.root, x, features) 199 | num_test = x.shape[0] 200 | leaf_features = [] 201 | 202 | for i in range(num_test): 203 | features = [] 204 | test_example = x[i, :] 205 | leaf_features.append(_get_features(self.root, test_example, features)) 206 | 207 | return leaf_features 208 | 209 | def prune(self, alpha=0.05): 210 | 211 | def _prune(node: PEHENode): 212 | if node.true_branch is None or node.false_branch is None: 213 | return 214 | 215 | # recursive call for each branch 216 | if not node.true_branch.is_leaf: 217 | _prune(node.true_branch) 218 | if not node.false_branch.is_leaf: 219 | _prune(node.false_branch) 220 | 221 | # merge leaves (potentially) 222 | if node.true_branch.is_leaf and node.false_branch.is_leaf: 223 | # Get branches 224 | tb = node.true_branch 225 | fb = node.false_branch 226 | 227 | tb_pval = tb.p_val 228 | fb_pval = fb.p_val 229 | 230 | if tb_pval > alpha and fb_pval > alpha: 231 | node.leaf_num = node.true_branch.leaf_num 232 | node.true_branch = None 233 | node.false_branch = None 234 | self.num_leaves = self.num_leaves - 1 235 | node.is_leaf = True 236 | 237 | # ---------------------------------------------------------------- 238 | # Something about obj/mse? if that is added 239 | # 240 | # - can do a self function so that tree references itself/it's own type of node? 241 | # ---------------------------------------------------------------- 242 | if tb.node_depth == self.tree_depth: 243 | self.tree_depth = self.tree_depth - 1 244 | 245 | _prune(self.root) 246 | 247 | def get_triggers(self, x): 248 | pass 249 | 250 | def save(self, filename): 251 | import pickle as pkl 252 | 253 | check_dir(filename) 254 | with open(filename, "wb") as file: 255 | pkl.dump(self, file) 256 | -------------------------------------------------------------------------------- /CTL/causal_tree/sig_diff/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/CTL/causal_tree/sig_diff/__init__.py -------------------------------------------------------------------------------- /CTL/causal_tree/sig_diff/sig.py: -------------------------------------------------------------------------------- 1 | # from CTL.causal_tree.util import * 2 | try: 3 | from CTL.causal_tree.util_c import * 4 | except: 5 | from CTL.causal_tree.util import * 6 | from CTL.causal_tree.ct import * 7 | import numpy as np 8 | from scipy.stats import ttest_ind_from_stats 9 | 10 | 11 | class SigNode(CTNode): 12 | 13 | def __init__(self, p_val=1.0, effect=0.0, node_depth=0, control_mean=0.0, treatment_mean=0.0, col=-1, value=-1, 14 | is_leaf=False, leaf_num=-1, num_samples=0.0, obj=0.0): 15 | super().__init__() 16 | # not tree specific features (most likely added at creation) 17 | self.p_val = p_val 18 | self.effect = effect 19 | self.node_depth = node_depth 20 | self.control_mean = control_mean 21 | self.treatment_mean = treatment_mean 22 | 23 | # during tree building 24 | self.obj = obj 25 | self.num_samples = num_samples 26 | 27 | # after building tree 28 | self.col = col 29 | self.value = value 30 | self.is_leaf = is_leaf 31 | self.leaf_num = leaf_num 32 | self.true_branch = None 33 | self.false_branch = None 34 | 35 | # after calling functions 36 | self.column_name = "" 37 | self.decision = "" 38 | 39 | 40 | class SigTree(CausalTree): 41 | 42 | def __init__(self, alpha=0.05, max_depth=-1, min_size=2, seed=724, max_values=None, verbose=False): 43 | super().__init__() 44 | self.alpha = 0.05 45 | self.max_depth = max_depth 46 | self.min_size = min_size 47 | self.seed = seed 48 | 49 | self.max_values = max_values 50 | self.verbose = verbose 51 | 52 | self.max_effect = 0.0 53 | self.min_effect = 0.0 54 | 55 | self.features = None 56 | 57 | self.root = SigNode() 58 | 59 | @abstractmethod 60 | def fit(self, x, y, t): 61 | pass 62 | 63 | def _eval_util(self, train_y, train_t): 64 | var_t, var_c = variance(train_y, train_t) 65 | std = np.sqrt(var_t) + np.sqrt(var_c) 66 | effect = ace(train_y, train_t) 67 | 68 | return effect, std 69 | 70 | def _eval(self, y_train1, t_train1, y_train2, t_train2): 71 | 72 | total1 = y_train1.shape[0] 73 | total2 = y_train2.shape[0] 74 | 75 | return_val = (1, 1) 76 | if total1 < 1 or total2 < 1: 77 | return return_val 78 | 79 | effect1, std1 = self._eval_util(y_train1, t_train1) 80 | effect2, std2 = self._eval_util(y_train2, t_train2) 81 | 82 | stat, p_val = ttest_ind_from_stats(effect1, std1, total1, effect2, std2, total2) 83 | return stat, p_val 84 | 85 | def predict(self, x): 86 | 87 | def _predict(node: SigNode, observation): 88 | if node.is_leaf: 89 | return node.effect 90 | else: 91 | v = observation[node.col] 92 | if v >= node.value: 93 | branch = node.true_branch 94 | else: 95 | branch = node.false_branch 96 | 97 | return _predict(branch, observation) 98 | 99 | if len(x.shape) == 1: 100 | prediction = _predict(self.root, x) 101 | return prediction 102 | 103 | num_test = x.shape[0] 104 | 105 | prediction = np.zeros(num_test) 106 | 107 | for i in range(num_test): 108 | test_example = x[i, :] 109 | prediction[i] = _predict(self.root, test_example) 110 | 111 | return prediction 112 | 113 | def get_groups(self, x): 114 | 115 | def _get_group(node: SigNode, observation): 116 | if node.is_leaf: 117 | return node.leaf_num 118 | else: 119 | v = observation[node.col] 120 | if v >= node.value: 121 | branch = node.true_branch 122 | else: 123 | branch = node.false_branch 124 | 125 | return _get_group(branch, observation) 126 | 127 | if len(x.shape) == 1: 128 | return _get_group(self.root, x) 129 | num_test = x.shape[0] 130 | leaf_results = np.zeros(num_test) 131 | 132 | for i in range(num_test): 133 | test_example = x[i, :] 134 | leaf_results[i] = _get_group(self.root, test_example) 135 | 136 | return leaf_results 137 | 138 | def get_features(self, x): 139 | 140 | def _get_features(node: SigNode, observation, features): 141 | if node.is_leaf: 142 | return features 143 | else: 144 | v = observation[node.col] 145 | if v >= node.value: 146 | branch = node.true_branch 147 | else: 148 | branch = node.false_branch 149 | 150 | features.append(node.decision) 151 | return _get_features(branch, observation, features) 152 | 153 | if len(x.shape) == 1: 154 | features = [] 155 | return _get_features(self.root, x, features) 156 | num_test = x.shape[0] 157 | leaf_features = [] 158 | 159 | for i in range(num_test): 160 | features = [] 161 | test_example = x[i, :] 162 | leaf_features.append(_get_features(self.root, test_example, features)) 163 | 164 | return leaf_features 165 | 166 | def prune(self, alpha=0.05): 167 | 168 | def _prune(node: SigNode): 169 | if node.true_branch is None or node.false_branch is None: 170 | return 171 | 172 | # recursive call for each branch 173 | if not node.true_branch.is_leaf: 174 | _prune(node.true_branch) 175 | if not node.false_branch.is_leaf: 176 | _prune(node.false_branch) 177 | 178 | # merge leaves (potentially) 179 | if node.true_branch.is_leaf and node.false_branch.is_leaf: 180 | # Get branches 181 | tb = node.true_branch 182 | fb = node.false_branch 183 | 184 | tb_pval = tb.p_val 185 | fb_pval = fb.p_val 186 | 187 | if tb_pval > alpha and fb_pval > alpha: 188 | node.leaf_num = node.true_branch.leaf_num 189 | node.true_branch = None 190 | node.false_branch = None 191 | self.num_leaves = self.num_leaves - 1 192 | node.is_leaf = True 193 | 194 | # ---------------------------------------------------------------- 195 | # Something about obj/mse? if that is added 196 | # 197 | # - can do a self function so that tree references itself/it's own type of node? 198 | # ---------------------------------------------------------------- 199 | if tb.node_depth == self.tree_depth: 200 | self.tree_depth = self.tree_depth - 1 201 | 202 | _prune(self.root) 203 | 204 | def get_triggers(self, x): 205 | pass 206 | 207 | def save(self, filename): 208 | import pickle as pkl 209 | 210 | check_dir(filename) 211 | with open(filename, "wb") as file: 212 | pkl.dump(self, file) -------------------------------------------------------------------------------- /CTL/causal_tree/sig_diff/sig_base.py: -------------------------------------------------------------------------------- 1 | from CTL.causal_tree.sig_diff.sig import * 2 | 3 | 4 | class BaseCausalTreeLearnNode(SigNode): 5 | 6 | def __init__(self, **kwargs): 7 | super().__init__(**kwargs) 8 | 9 | 10 | class SigTreeBase(SigTree): 11 | 12 | def __init__(self, **kwargs): 13 | super().__init__(**kwargs) 14 | self.root = BaseCausalTreeLearnNode() 15 | 16 | def fit(self, x, y, t): 17 | if x.shape[0] == 0: 18 | return 0 19 | 20 | # ---------------------------------------------------------------- 21 | # Seed 22 | # ---------------------------------------------------------------- 23 | np.random.seed(self.seed) 24 | 25 | train_x, train_y, train_t = x, y, t 26 | self.root.num_samples = train_y.shape[0] 27 | # ---------------------------------------------------------------- 28 | # effect and pvals 29 | # ---------------------------------------------------------------- 30 | effect = tau_squared(y, t) 31 | p_val = get_pval(y, t) 32 | self.root.effect = effect 33 | self.root.p_val = p_val 34 | 35 | self.root.obj = 0 36 | 37 | # ---------------------------------------------------------------- 38 | # Add control/treatment means 39 | # ---------------------------------------------------------------- 40 | self.root.control_mean = np.mean(y[t == 0]) 41 | self.root.treatment_mean = np.mean(y[t == 1]) 42 | 43 | self.root.num_samples = x.shape[0] 44 | 45 | self._fit(self.root, train_x, train_y, train_t) 46 | 47 | def _fit(self, node: BaseCausalTreeLearnNode, train_x, train_y, train_t): 48 | 49 | if train_x.shape[0] == 0: 50 | return node 51 | 52 | if node.node_depth > self.tree_depth: 53 | self.tree_depth = node.node_depth 54 | 55 | if self.max_depth == self.tree_depth: 56 | if node.effect > self.max_effect: 57 | self.max_effect = node.effect 58 | if node.effect < self.min_effect: 59 | self.min_effect = node.effect 60 | self.num_leaves += 1 61 | node.leaf_num = self.num_leaves 62 | node.is_leaf = True 63 | return node 64 | 65 | best_gain = 1.0 66 | best_attributes = [] 67 | best_tb_obj, best_fb_obj = (0.0, 0.0) 68 | 69 | column_count = train_x.shape[1] 70 | for col in range(0, column_count): 71 | unique_vals = np.unique(train_x[:, col]) 72 | 73 | if self.max_values is not None: 74 | if self.max_values < 1: 75 | idx = np.round(np.linspace( 76 | 0, len(unique_vals) - 1, self.max_values * len(unique_vals))).astype(int) 77 | unique_vals = unique_vals[idx] 78 | else: 79 | idx = np.round(np.linspace( 80 | 0, len(unique_vals) - 1, self.max_values)).astype(int) 81 | unique_vals = unique_vals[idx] 82 | 83 | for value in unique_vals: 84 | 85 | # check training data size 86 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 87 | = divide_set(train_x, train_y, train_t, col, value) 88 | check1 = check_min_size(self.min_size, train_t1) 89 | check2 = check_min_size(self.min_size, train_t2) 90 | if check1 or check2: 91 | continue 92 | 93 | t_stat, diff_pval = self._eval(train_y1, train_t1, train_y2, train_t2) 94 | 95 | gain = diff_pval 96 | 97 | if gain < best_gain and gain <= self.alpha: 98 | best_gain = gain 99 | best_attributes = [col, value] 100 | 101 | if best_gain <= self.alpha: 102 | node.col = best_attributes[0] 103 | node.value = best_attributes[1] 104 | 105 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 106 | = divide_set(train_x, train_y, train_t, node.col, node.value) 107 | 108 | y1 = train_y1 109 | y2 = train_y2 110 | t1 = train_t1 111 | t2 = train_t2 112 | 113 | best_tb_effect = ace(y1, t1) 114 | best_fb_effect = ace(y2, t2) 115 | tb_p_val = get_pval(y1, t1) 116 | fb_p_val = get_pval(y2, t2) 117 | 118 | self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj 119 | 120 | tb = BaseCausalTreeLearnNode(obj=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val, 121 | node_depth=node.node_depth + 1, 122 | num_samples=y1.shape[0]) 123 | fb = BaseCausalTreeLearnNode(obj=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val, 124 | node_depth=node.node_depth + 1, 125 | num_samples=y2.shape[0]) 126 | 127 | node.true_branch = self._fit(tb, train_x1, train_y1, train_t1) 128 | node.false_branch = self._fit(fb, train_x2, train_y2, train_t2) 129 | 130 | if node.effect > self.max_effect: 131 | self.max_effect = node.effect 132 | if node.effect < self.min_effect: 133 | self.min_effect = node.effect 134 | 135 | return node 136 | 137 | else: 138 | if node.effect > self.max_effect: 139 | self.max_effect = node.effect 140 | if node.effect < self.min_effect: 141 | self.min_effect = node.effect 142 | 143 | self.num_leaves += 1 144 | node.leaf_num = self.num_leaves 145 | node.is_leaf = True 146 | return node 147 | -------------------------------------------------------------------------------- /CTL/causal_tree/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import errno 3 | import numpy as np 4 | from scipy.stats import ttest_ind 5 | import subprocess 6 | import time 7 | 8 | 9 | def check_dir(path): 10 | if not os.path.exists(os.path.dirname(path)): 11 | try: 12 | os.makedirs(os.path.dirname(path)) 13 | except OSError as exc: 14 | if exc.errno != errno.EEXIST: 15 | raise 16 | 17 | 18 | def divide_set(x, y, t, col, value): 19 | idx1 = x[:, col] >= value 20 | idx2 = ~idx1 21 | 22 | x1 = x[idx1] 23 | x2 = x[idx2] 24 | 25 | y1 = y[idx1] 26 | y2 = y[idx2] 27 | 28 | t1 = t[idx1] 29 | t2 = t[idx2] 30 | 31 | return x1, x2, y1, y2, t1, t2 32 | 33 | 34 | def tau_squared(y, t): 35 | total = y.shape[0] 36 | 37 | return_val = (-np.inf, -np.inf) 38 | 39 | if total == 0: 40 | return return_val 41 | 42 | treat_vect = t 43 | 44 | effect = ace(y, treat_vect) 45 | err = (effect ** 2) * total 46 | 47 | return effect 48 | 49 | 50 | def tau_squared_trigger(outcome, treatment, min_size=1, quartile=False): 51 | """Continuous case""" 52 | total = outcome.shape[0] 53 | 54 | return_val = (-np.inf, -np.inf) 55 | 56 | if total == 0: 57 | return return_val 58 | 59 | unique_treatment = np.unique(treatment) 60 | 61 | if unique_treatment.shape[0] == 1: 62 | return return_val 63 | 64 | unique_treatment = (unique_treatment[1:] + unique_treatment[:-1]) / 2 65 | unique_treatment = unique_treatment[1:-1] 66 | 67 | if quartile: 68 | first_quartile = int(np.floor(unique_treatment.shape[0] / 4)) 69 | third_quartile = int(np.ceil(3 * unique_treatment.shape[0] / 4)) 70 | 71 | unique_treatment = unique_treatment[first_quartile:third_quartile] 72 | 73 | yy = np.tile(outcome, (unique_treatment.shape[0], 1)) 74 | tt = np.tile(treatment, (unique_treatment.shape[0], 1)) 75 | 76 | x = np.transpose(np.transpose(tt) > unique_treatment) 77 | 78 | tt[x] = 1 79 | tt[np.logical_not(x)] = 0 80 | 81 | treat_num = np.sum(tt == 1, axis=1) 82 | cont_num = np.sum(tt == 0, axis=1) 83 | min_size_idx = np.where(np.logical_and( 84 | treat_num >= min_size, cont_num >= min_size)) 85 | 86 | unique_treatment = unique_treatment[min_size_idx] 87 | tt = tt[min_size_idx] 88 | yy = yy[min_size_idx] 89 | 90 | if tt.shape[0] == 0: 91 | return return_val 92 | 93 | y_t_m = np.sum((yy * (tt == 1)), axis=1) / np.sum(tt == 1, axis=1) 94 | y_c_m = np.sum((yy * (tt == 0)), axis=1) / np.sum(tt == 0, axis=1) 95 | 96 | effect = y_t_m - y_c_m 97 | err = effect ** 2 98 | 99 | max_err = np.argmax(err) 100 | 101 | best_effect = effect[max_err] 102 | best_err = err[max_err] 103 | best_split = unique_treatment[max_err] 104 | 105 | best_err = total * best_err 106 | 107 | return best_effect, best_split 108 | 109 | 110 | def ace(y, t): 111 | treat = t >= 0.5 112 | # control = t == 0 113 | control = ~treat 114 | 115 | yt = y[treat] 116 | yc = y[control] 117 | 118 | mu1 = 0.0 119 | mu0 = 0.0 120 | if yt.shape[0] != 0: 121 | mu1 = np.mean(yt) 122 | if yc.shape[0] != 0: 123 | mu0 = np.mean(yc) 124 | 125 | return mu1 - mu0 126 | 127 | 128 | def ace_trigger(y, t, trigger): 129 | treat = t >= trigger 130 | control = ~treat 131 | 132 | yt = y[treat] 133 | yc = y[control] 134 | 135 | mu1 = 0.0 136 | mu0 = 0.0 137 | if yt.shape[0] != 0: 138 | mu1 = np.mean(yt) 139 | if yc.shape[0] != 0: 140 | mu0 = np.mean(yc) 141 | 142 | return mu1 - mu0 143 | 144 | 145 | def get_pval(y, t): 146 | treat = t == 1 147 | # control = t == 0 148 | control = ~treat 149 | 150 | outcome_cont = y[treat] 151 | outcome_trt = y[control] 152 | 153 | p_val = ttest_ind(outcome_cont, outcome_trt)[1] 154 | 155 | if np.isnan(p_val): 156 | return 0.000 157 | 158 | return p_val 159 | 160 | 161 | def get_pval_trigger(y, t, trigger): 162 | treat = t >= trigger 163 | control = ~treat 164 | 165 | outcome_cont = y[treat] 166 | outcome_trt = y[control] 167 | 168 | p_val = ttest_ind(outcome_cont, outcome_trt)[1] 169 | 170 | if np.isnan(p_val): 171 | return 0.000 172 | 173 | return p_val 174 | 175 | 176 | def min_size_value_bool(min_size, t, trigger=0.5): 177 | nt, nc = get_treat_size(t, trigger=trigger) 178 | 179 | return nt, nc, nt < min_size or nc < min_size 180 | 181 | 182 | def check_min_size(min_size, t, trigger=0.5): 183 | nt, nc = get_treat_size(t, trigger) 184 | 185 | return nt < min_size or nc < min_size 186 | 187 | 188 | def get_treat_size(t, trigger=0.5): 189 | treated = t >= trigger 190 | control = ~treated 191 | num_treatment = t[treated].shape[0] 192 | num_control = t[control].shape[0] 193 | 194 | return num_treatment, num_control 195 | 196 | 197 | def variance(y, t): 198 | treat_vect = t 199 | 200 | treat = treat_vect == 1 201 | # control = treat_vect == 0 202 | control = ~treat 203 | 204 | if y.shape[0] == 0: 205 | return np.array([np.inf, np.inf]) 206 | 207 | yt = y[treat] 208 | yc = y[control] 209 | 210 | if yt.shape[0] == 0: 211 | var_t = np.var(y) 212 | else: 213 | var_t = np.var(yt) 214 | 215 | if yc.shape[0] == 0: 216 | var_c = np.var(y) 217 | else: 218 | var_c = np.var(yc) 219 | 220 | return var_t, var_c 221 | 222 | 223 | def variance_trigger(y, t, trigger): 224 | treat_vect = t 225 | 226 | treat = treat_vect >= trigger 227 | # control = treat_vect == 0 228 | control = ~treat 229 | 230 | if y.shape[0] == 0: 231 | return np.array([np.inf, np.inf]) 232 | 233 | yt = y[treat] 234 | yc = y[control] 235 | 236 | if yt.shape[0] == 0: 237 | var_t = np.var(y) 238 | else: 239 | var_t = np.var(yt) 240 | 241 | if yc.shape[0] == 0: 242 | var_c = np.var(y) 243 | else: 244 | var_c = np.var(yc) 245 | 246 | return var_t, var_c 247 | 248 | 249 | def col_dict(names): 250 | feat_names = {} 251 | for i, name in enumerate(names): 252 | column = "Column %s" % i 253 | feat_names[column] = name 254 | return feat_names 255 | -------------------------------------------------------------------------------- /CTL/causal_tree/util_c.cpython-37m-darwin.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/CTL/causal_tree/util_c.cpython-37m-darwin.so -------------------------------------------------------------------------------- /CTL/pehe_tree.py: -------------------------------------------------------------------------------- 1 | from CTL._tree import _CausalTree 2 | from CTL.causal_tree.nn_pehe.base import * 3 | from CTL.causal_tree.nn_pehe.val import * 4 | from CTL.causal_tree.nn_pehe.honest import * 5 | from CTL.causal_tree.nn_pehe.balance_split import * 6 | 7 | 8 | class PEHETree(_CausalTree): 9 | 10 | def __init__(self, min_size=2, max_depth=-1, k=1, 11 | val=False, split_size=0.5, 12 | honest=False, 13 | use_propensity=False, propensity_model=None, 14 | balance=False, 15 | seed=724): 16 | super().__init__() 17 | 18 | params = { 19 | "min_size": min_size, 20 | "max_depth": max_depth, 21 | "k": k, 22 | "seed": seed, 23 | "split_size": split_size, 24 | "use_propensity": use_propensity, 25 | "propensity_model": propensity_model 26 | } 27 | if val: 28 | self.tree = ValPEHE(**params) 29 | elif honest: 30 | self.tree = HonestPEHE(**params) 31 | elif balance: 32 | self.tree = BalanceBasePEHE(**params) 33 | else: 34 | self.tree = BasePEHE(**params) 35 | 36 | self.column_num = 0 37 | self.fitted = False 38 | self.tree_depth = 0 39 | 40 | self.obj = 0 41 | self.pehe = 0 42 | 43 | def fit(self, x, y, t): 44 | self.column_num = x.shape[1] 45 | x = x.astype(np.float64) 46 | y = y.astype(np.float64) 47 | t = t.astype(np.float64) 48 | self.tree.fit(x, y, t) 49 | self.fitted = True 50 | self.tree_depth = self.tree.tree_depth 51 | self.obj = self.tree.obj 52 | self.pehe = self.tree.pehe 53 | -------------------------------------------------------------------------------- /CTL/sig_diff_tree.py: -------------------------------------------------------------------------------- 1 | from CTL._tree import _CausalTree 2 | from CTL.causal_tree.sig_diff.sig_base import SigTreeBase 3 | from CTL.causal_tree.sig_diff.sig_val import SigTreeVal 4 | import numpy as np 5 | 6 | 7 | class SigDiffTree(_CausalTree): 8 | 9 | def __init__(self, alpha=0.05, min_size=2, max_depth=-1, val=False, split_size=0.5, seed=724): 10 | super().__init__() 11 | 12 | params = { 13 | "alpha": alpha, 14 | "min_size": min_size, 15 | "max_depth": max_depth, 16 | "seed": seed, 17 | } 18 | if val: 19 | params["split_size"] = split_size 20 | self.tree = SigTreeVal(**params) 21 | else: 22 | self.tree = SigTreeBase(**params) 23 | 24 | self.column_num = 0 25 | self.fitted = False 26 | self.tree_depth = 0 27 | 28 | self.obj = 0 29 | 30 | def fit(self, x, y, t): 31 | self.column_num = x.shape[1] 32 | x = x.astype(np.float64) 33 | y = y.astype(np.float64) 34 | t = t.astype(np.float64) 35 | self.tree.fit(x, y, t) 36 | self.fitted = True 37 | self.tree_depth = self.tree.tree_depth 38 | self.obj = self.tree.obj 39 | -------------------------------------------------------------------------------- /CTL/tree.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class Node(ABC): 5 | 6 | def __init__(self): 7 | self.is_leaf = False 8 | 9 | 10 | class Tree(ABC): 11 | 12 | def __init__(self): 13 | pass 14 | 15 | @abstractmethod 16 | def fit(self, x, y, t): 17 | pass 18 | 19 | @abstractmethod 20 | def predict(self, x): 21 | pass 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) [year] [fullname] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CTL 2 | 3 | Christopher Tran, Elena Zheleva, ["Learning Triggers for Heterogeneous Treatment Effects", AAAI 2019.](https://arxiv.org/pdf/1902.00087.pdf) 4 | 5 | Our method is based on and adapted from: https://github.com/susanathey/causalTree 6 | 7 | 8 | ## Requirements 9 | * Python 3 10 | * sklearn 11 | * scipy 12 | * graphviz (if you want to plot the tree) 13 | 14 | ## Installation 15 | 16 | through pip 17 | 18 | ```bash 19 | pip install causal_tree_learn 20 | ``` 21 | 22 | or clone the repository 23 | ```bash 24 | python setup.py build_ext --inplace 25 | ``` 26 | 27 | ## Demo Code 28 | 29 | Two demo codes are available to run. 30 | 31 | ```bash 32 | python binary_example.py 33 | ``` 34 | Runs the tree on a binary example (asthma.txt) 35 | 36 | ```bash 37 | python trigger_example.py 38 | ``` 39 | Runs a tree on a trigger problem where the treatment is continuous (note for now the example is made up and treatment does not affect outcome, this is only to show example code) 40 | -------------------------------------------------------------------------------- /binary_example.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from CTL.causal_tree_learn import CausalTree 3 | from sklearn.model_selection import train_test_split 4 | import numpy as np 5 | 6 | asthma = pd.read_csv('data/asthma.txt', delimiter=' ', index_col=None) 7 | 8 | asthma.columns = ['physician', 'age', 'sex', 'education', 'insurance', 'drug coverage', 'severity', 9 | 'comorbidity', 'physical comorbidity', 'mental comorbidity', 'satisfaction'] 10 | 11 | y = asthma['satisfaction'].values 12 | treatment = asthma['physician'].values 13 | 14 | x = asthma.drop(['satisfaction', 'physician'], axis=1).values 15 | 16 | columns = asthma.drop(['satisfaction', 'physician'], axis=1).columns 17 | 18 | y[y == 0] = -1 19 | 20 | treatment[treatment == 1] = 0 21 | treatment[treatment == 2] = 1 22 | 23 | np.random.seed(0) 24 | 25 | 26 | x_train, x_test, y_train, y_test, treat_train, treat_test = train_test_split(x, y, treatment, 27 | test_size=0.5, random_state=42) 28 | 29 | # regular CTL 30 | ctl = CausalTree(magnitude=False) 31 | ctl.fit(x_train, y_train, treat_train) 32 | ctl.prune() 33 | ctl_predict = ctl.predict(x_test) 34 | 35 | # honest CTL (CT-HL) 36 | cthl = CausalTree(honest=True) 37 | cthl.fit(x_train, y_train, treat_train) 38 | cthl.prune() 39 | cthl_predict = cthl.predict(x_test) 40 | 41 | # val honest CTL (CT-HV) 42 | cthv = CausalTree(val_honest=True) 43 | cthv.fit(x_train, y_train, treat_train) 44 | cthv.prune() 45 | cthv_predict = cthv.predict(x_test) 46 | 47 | # adaptive CT (Athey and Imbens, PNAS 2016) 48 | ct_adaptive = CausalTree(weight=0.0, split_size=0.0) 49 | ct_adaptive.fit(x_train, y_train, treat_train) 50 | ct_adaptive.prune() 51 | ct_adaptive_predict = cthv.predict(x_test) 52 | 53 | # honest CT (Athey and Imbens, PNAS 2016) 54 | ct_honest = CausalTree(honest=True, weight=0.0, split_size=0.0) 55 | ct_honest.fit(x_train, y_train, treat_train) 56 | ct_honest.prune() 57 | ct_honest_predict = ct_honest.predict(x_test) 58 | 59 | ct_adaptive.plot_tree(features=columns, filename="output/bin_tree_adaptive", show_effect=True) 60 | ct_honest.plot_tree(features=columns, filename="output/bin_tree_honest", show_effect=True) 61 | ctl.plot_tree(features=columns, filename="output/bin_tree", show_effect=True) 62 | cthl.plot_tree(features=columns, filename="output/bin_tree_honest_learn", show_effect=True) 63 | cthv.plot_tree(features=columns, filename="output/bin_tree_honest_validation", show_effect=True) -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/build/lib.macosx-12.6-arm64-cpython-310/CTL/__init__.py -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_learn_forest.py: -------------------------------------------------------------------------------- 1 | from CTL.causal_tree_learn import CausalTree 2 | import numpy as np 3 | 4 | 5 | class CausalTreeLearnForest: 6 | 7 | def __init__(self, num_trees=10, bootstrap=True, max_samples=None, max_features="auto", max_depth=-1, 8 | val_honest=False, honest=False, min_size=2, split_size=0.5, weight=0.5, feature_batch_size=None, 9 | seed=724): 10 | 11 | tree_params = { 12 | "weight": weight, 13 | "split_size": split_size, 14 | "max_depth": max_depth, 15 | "seed": seed, 16 | "min_size": min_size, 17 | "val_honest": val_honest, 18 | "honest": honest, 19 | "feature_batch_size": feature_batch_size, 20 | } 21 | 22 | self.num_trees = num_trees 23 | self.bootstrap = bootstrap 24 | self.max_samples = max_samples 25 | self.max_features = max_features 26 | self.max_depth = max_depth 27 | 28 | self.trees = tuple(CausalTree(**tree_params) for i in range(num_trees)) 29 | 30 | def fit(self, x, y, t): 31 | x = x.astype(float) 32 | y = y.astype(float) 33 | t = t.astype(float) 34 | 35 | for tree in self.trees: 36 | example_samples, feature_samples = self._sample(x) 37 | 38 | sample_x = x[np.ix_(example_samples, feature_samples)] 39 | sample_y = y[example_samples] 40 | sample_t = t[example_samples] 41 | 42 | tree.fit(sample_x, sample_y, sample_t) 43 | 44 | def predict(self, x): 45 | predictions = np.zeros((self.num_trees, x.shape[0])) 46 | for i, tree in enumerate(self.trees): 47 | predictions[i] = tree.predict(x) 48 | 49 | return np.mean(predictions, axis=0) 50 | 51 | def _sample(self, x): 52 | total_examples = x.shape[0] 53 | total_features = x.shape[1] 54 | 55 | example_samples = self._sample_examples(total_examples) 56 | feature_samples = self._feature_sample(total_features) 57 | 58 | return example_samples, feature_samples 59 | 60 | def _sample_examples(self, total_examples): 61 | if self.bootstrap: 62 | if self.max_samples: 63 | if isinstance(self.max_samples, float): 64 | example_samples = np.random.choice(np.arange(0, total_examples), 65 | size=int(self.max_samples * total_examples)) 66 | elif isinstance(self.max_samples, int): 67 | example_samples = np.random.choice(np.arange(0, total_examples), size=self.max_samples) 68 | else: 69 | example_samples = np.random.choice(np.arange(0, total_examples), size=total_examples) 70 | else: 71 | example_samples = np.random.choice(np.arange(0, total_examples), size=total_examples) 72 | else: 73 | example_samples = np.arange(0, total_examples) 74 | 75 | return example_samples 76 | 77 | def _feature_sample(self, total_features): 78 | num_features = self._feature_sample_size(total_features) 79 | feature_samples = np.random.permutation(total_features)[:num_features] 80 | return feature_samples 81 | 82 | def _feature_sample_size(self, total_features): 83 | num_features = total_features 84 | if self.max_features == "auto" or self.max_features == "sqrt": 85 | num_features = int(np.sqrt(num_features)) 86 | elif isinstance(self.max_features, int): 87 | num_features = self.max_features 88 | elif isinstance(self.max_features, float): 89 | num_features = int(self.max_features * total_features) 90 | return num_features 91 | -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/__init__.py -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/ct.py: -------------------------------------------------------------------------------- 1 | from CTL.tree import * 2 | from abc import ABC, abstractmethod 3 | 4 | 5 | class CTNode(ABC): 6 | 7 | def __init__(self): 8 | super().__init__() 9 | 10 | 11 | class CausalTree(ABC): 12 | 13 | def __init__(self): 14 | super().__init__() 15 | 16 | # the learning objective 17 | self.obj = 0.0 18 | # Haven't implemented "mse" yet 19 | self.mse = 0.0 20 | 21 | # tree properties 22 | self.tree_depth = 0 23 | self.num_leaves = 0 24 | 25 | @abstractmethod 26 | def fit(self, x, y, t): 27 | pass 28 | 29 | @abstractmethod 30 | def predict(self, x): 31 | pass 32 | -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/ctl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/ctl/__init__.py -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/ctl/adaptive.py: -------------------------------------------------------------------------------- 1 | from CTL.causal_tree.ctl.binary_ctl import * 2 | from sklearn.model_selection import train_test_split 3 | 4 | 5 | class AdaptiveNode(CTLearnNode): 6 | 7 | def __init__(self, **kwargs): 8 | super().__init__(**kwargs) 9 | 10 | # self.obj = obj 11 | 12 | 13 | # ---------------------------------------------------------------- 14 | # Base causal tree (ctl, base objective) 15 | # ---------------------------------------------------------------- 16 | class AdaptiveTree(CTLearn): 17 | 18 | def __init__(self, **kwargs): 19 | super().__init__(**kwargs) 20 | self.root = AdaptiveNode() 21 | 22 | def adaptive_eval(self, train_y, train_t): 23 | total_train = train_y.shape[0] 24 | 25 | train_effect = ace(train_y, train_t) 26 | 27 | train_mse = total_train * (train_effect ** 2) 28 | 29 | obj = train_mse 30 | mse = total_train * (train_effect ** 2) 31 | 32 | return obj, mse 33 | 34 | def fit(self, x, y, t): 35 | if x.shape[0] == 0: 36 | return 0 37 | 38 | # ---------------------------------------------------------------- 39 | # Seed 40 | # ---------------------------------------------------------------- 41 | np.random.seed(self.seed) 42 | 43 | # ---------------------------------------------------------------- 44 | # Verbosity? 45 | # ---------------------------------------------------------------- 46 | 47 | # ---------------------------------------------------------------- 48 | # Split data 49 | # ---------------------------------------------------------------- 50 | 51 | self.root.num_samples = y.shape[0] 52 | # ---------------------------------------------------------------- 53 | # effect and pvals 54 | # ---------------------------------------------------------------- 55 | effect = tau_squared(y, t) 56 | p_val = get_pval(y, t) 57 | self.root.effect = effect 58 | self.root.p_val = p_val 59 | 60 | # ---------------------------------------------------------------- 61 | # Not sure if i should eval in root or not 62 | # ---------------------------------------------------------------- 63 | node_eval, mse = self.adaptive_eval(y, t) 64 | self.root.obj = node_eval 65 | 66 | # ---------------------------------------------------------------- 67 | # Add control/treatment means 68 | # ---------------------------------------------------------------- 69 | self.root.control_mean = np.mean(y[t == 0]) 70 | self.root.treatment_mean = np.mean(y[t == 1]) 71 | 72 | self.root.num_samples = x.shape[0] 73 | 74 | self._fit(self.root, x, y, t) 75 | 76 | def _fit(self, node: AdaptiveNode, train_x, train_y, train_t): 77 | 78 | if train_x.shape[0] == 0: 79 | return node 80 | 81 | if node.node_depth > self.tree_depth: 82 | self.tree_depth = node.node_depth 83 | 84 | if self.max_depth == self.tree_depth: 85 | if node.effect > self.max_effect: 86 | self.max_effect = node.effect 87 | if node.effect < self.min_effect: 88 | self.min_effect = node.effect 89 | self.num_leaves += 1 90 | node.leaf_num = self.num_leaves 91 | node.is_leaf = True 92 | return node 93 | 94 | best_gain = 0.0 95 | best_attributes = [] 96 | best_tb_obj, best_fb_obj = (0.0, 0.0) 97 | 98 | column_count = train_x.shape[1] 99 | for col in range(0, column_count): 100 | unique_vals = np.unique(train_x[:, col]) 101 | 102 | if self.max_values is not None: 103 | if self.max_values < 1: 104 | idx = np.round(np.linspace( 105 | 0, len(unique_vals) - 1, self.max_values * len(unique_vals))).astype(int) 106 | unique_vals = unique_vals[idx] 107 | else: 108 | idx = np.round(np.linspace( 109 | 0, len(unique_vals) - 1, self.max_values)).astype(int) 110 | unique_vals = unique_vals[idx] 111 | 112 | for value in unique_vals: 113 | 114 | # check training data size 115 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 116 | = divide_set(train_x, train_y, train_t, col, value) 117 | check1 = check_min_size(self.min_size, train_t1) 118 | check2 = check_min_size(self.min_size, train_t2) 119 | if check1 or check2: 120 | continue 121 | 122 | tb_eval, tb_mse = self.adaptive_eval(train_y1, train_t1) 123 | fb_eval, fb_mse = self.adaptive_eval(train_y2, train_t2) 124 | 125 | split_eval = (tb_eval + fb_eval) 126 | gain = -node.obj + split_eval 127 | 128 | if gain > best_gain: 129 | best_gain = gain 130 | best_attributes = [col, value] 131 | best_tb_obj, best_fb_obj = (tb_eval, fb_eval) 132 | 133 | if best_gain > 0: 134 | node.col = best_attributes[0] 135 | node.value = best_attributes[1] 136 | 137 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 138 | = divide_set(train_x, train_y, train_t, node.col, node.value) 139 | 140 | y1 = train_y1 141 | y2 = train_y2 142 | t1 = train_t1 143 | t2 = train_t2 144 | 145 | best_tb_effect = ace(y1, t1) 146 | best_fb_effect = ace(y2, t2) 147 | tb_p_val = get_pval(y1, t1) 148 | fb_p_val = get_pval(y2, t2) 149 | 150 | self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj 151 | 152 | # ---------------------------------------------------------------- 153 | # Ignore "mse" here, come back to it later? 154 | # ---------------------------------------------------------------- 155 | 156 | tb = AdaptiveNode(obj=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val, 157 | node_depth=node.node_depth + 1, 158 | num_samples=y1.shape[0]) 159 | fb = AdaptiveNode(obj=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val, 160 | node_depth=node.node_depth + 1, 161 | num_samples=y2.shape[0]) 162 | 163 | node.true_branch = self._fit(tb, train_x1, train_y1, train_t1) 164 | node.false_branch = self._fit(fb, train_x2, train_y2, train_t2) 165 | 166 | if node.effect > self.max_effect: 167 | self.max_effect = node.effect 168 | if node.effect < self.min_effect: 169 | self.min_effect = node.effect 170 | 171 | return node 172 | 173 | else: 174 | if node.effect > self.max_effect: 175 | self.max_effect = node.effect 176 | if node.effect < self.min_effect: 177 | self.min_effect = node.effect 178 | 179 | self.num_leaves += 1 180 | node.leaf_num = self.num_leaves 181 | node.is_leaf = True 182 | return node 183 | -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/ctl_match/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/ctl_match/__init__.py -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/ctl_match/ctl_base.py: -------------------------------------------------------------------------------- 1 | from CTL.causal_tree.ctl_match.binary_ctl import * 2 | from sklearn.model_selection import train_test_split 3 | 4 | 5 | class BaseCausalTreeLearnNode(CTLearnNode): 6 | 7 | def __init__(self, **kwargs): 8 | super().__init__(**kwargs) 9 | 10 | # self.obj = obj 11 | 12 | 13 | # ---------------------------------------------------------------- 14 | # Base causal tree (ctl, base objective) 15 | # ---------------------------------------------------------------- 16 | class CTLMatchBase(CTLMatch): 17 | 18 | def __init__(self, **kwargs): 19 | super().__init__(**kwargs) 20 | self.root = BaseCausalTreeLearnNode() 21 | 22 | def fit(self, x, y, t): 23 | if x.shape[0] == 0: 24 | return 0 25 | 26 | # ---------------------------------------------------------------- 27 | # Seed 28 | # ---------------------------------------------------------------- 29 | np.random.seed(self.seed) 30 | 31 | # ---------------------------------------------------------------- 32 | # Verbosity? 33 | # ---------------------------------------------------------------- 34 | 35 | # ---------------------------------------------------------------- 36 | # Split data 37 | # ---------------------------------------------------------------- 38 | train_x, val_x, train_y, val_y, train_t, val_t = train_test_split(x, y, t, random_state=self.seed, shuffle=True, 39 | test_size=self.val_split) 40 | 41 | self.normalizer.fit(train_x) 42 | 43 | self.root.num_samples = y.shape[0] 44 | # ---------------------------------------------------------------- 45 | # effect and pvals 46 | # ---------------------------------------------------------------- 47 | effect = tau_squared(y, t) 48 | p_val = get_pval(y, t) 49 | self.root.effect = effect 50 | self.root.p_val = p_val 51 | 52 | # ---------------------------------------------------------------- 53 | # Not sure if i should eval in root or not 54 | # ---------------------------------------------------------------- 55 | node_eval, mse = self._eval(train_y, train_t, val_y, val_t) 56 | self.root.obj = node_eval 57 | 58 | # ---------------------------------------------------------------- 59 | # Add control/treatment means 60 | # ---------------------------------------------------------------- 61 | self.root.control_mean = np.mean(y[t == 0]) 62 | self.root.treatment_mean = np.mean(y[t == 1]) 63 | 64 | self.root.num_samples = x.shape[0] 65 | 66 | self._fit(self.root, train_x, train_y, train_t, val_x, val_y, val_t) 67 | 68 | def _fit(self, node: BaseCausalTreeLearnNode, train_x, train_y, train_t, val_x, val_y, val_t): 69 | 70 | if train_x.shape[0] == 0 or val_x.shape[0] == 0: 71 | node.is_leaf = True 72 | return node 73 | 74 | if node.node_depth > self.tree_depth: 75 | self.tree_depth = node.node_depth 76 | 77 | if self.max_depth == self.tree_depth: 78 | self.num_leaves += 1 79 | node.leaf_num = self.num_leaves 80 | node.is_leaf = True 81 | return node 82 | 83 | best_gain = 0.0 84 | best_attributes = [] 85 | best_tb_obj, best_fb_obj = (0.0, 0.0) 86 | 87 | column_count = train_x.shape[1] 88 | for col in range(0, column_count): 89 | unique_vals = np.unique(train_x[:, col]) 90 | 91 | # ---------------------------------------------------------------- 92 | # TODO: Max values stuff 93 | # ---------------------------------------------------------------- 94 | 95 | # using the faster evaluation with vector/matrix calculations 96 | try: 97 | if self.feature_batch_size is None: 98 | split_obj, upper_obj, lower_obj, value = self._eval_fast(train_x, train_y, train_t, val_x, val_y, 99 | val_t, 100 | unique_vals, col) 101 | gain = -node.obj + split_obj 102 | if gain > best_gain: 103 | best_gain = gain 104 | best_attributes = [col, value] 105 | best_tb_obj, best_fb_obj = (upper_obj, lower_obj) 106 | else: 107 | 108 | for x in batch(unique_vals, self.feature_batch_size): 109 | split_obj, upper_obj, lower_obj, value = self._eval_fast(train_x, train_y, train_t, val_x, 110 | val_y, val_t, x, col) 111 | 112 | gain = -node.obj + split_obj 113 | if gain > best_gain: 114 | best_gain = gain 115 | best_attributes = [col, value] 116 | best_tb_obj, best_fb_obj = (upper_obj, lower_obj) 117 | # if that fails (due to memory maybe?) then use the old calculation 118 | except: 119 | for value in unique_vals: 120 | 121 | (val_x1, val_x2, val_y1, val_y2, val_t1, val_t2) \ 122 | = divide_set(val_x, val_y, val_t, col, value) 123 | 124 | # check validation set size 125 | val_size = self.val_split * self.min_size if self.val_split * self.min_size > 2 else 2 126 | if check_min_size(val_size, val_t1) or check_min_size(val_size, val_t2): 127 | continue 128 | 129 | # check training data size 130 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 131 | = divide_set(train_x, train_y, train_t, col, value) 132 | check1 = check_min_size(self.min_size, train_t1) 133 | check2 = check_min_size(self.min_size, train_t2) 134 | if check1 or check2: 135 | continue 136 | 137 | tb_eval, tb_mse = self._eval(train_y1, train_t1, val_y1, val_t1) 138 | fb_eval, fb_mse = self._eval(train_y2, train_t2, val_y2, val_t2) 139 | 140 | split_eval = (tb_eval + fb_eval) 141 | gain = -node.obj + split_eval 142 | 143 | if gain > best_gain: 144 | best_gain = gain 145 | best_attributes = [col, value] 146 | best_tb_obj, best_fb_obj = (tb_eval, fb_eval) 147 | 148 | if best_gain > 0: 149 | node.col = best_attributes[0] 150 | node.value = best_attributes[1] 151 | 152 | # print(node.col) 153 | 154 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 155 | = divide_set(train_x, train_y, train_t, node.col, node.value) 156 | 157 | (val_x1, val_x2, val_y1, val_y2, val_t1, val_t2) \ 158 | = divide_set(val_x, val_y, val_t, node.col, node.value) 159 | 160 | y1 = np.concatenate((train_y1, val_y1)) 161 | y2 = np.concatenate((train_y2, val_y2)) 162 | t1 = np.concatenate((train_t1, val_t1)) 163 | t2 = np.concatenate((train_t2, val_t2)) 164 | 165 | best_tb_effect = ace(y1, t1) 166 | best_fb_effect = ace(y2, t2) 167 | tb_p_val = get_pval(y1, t1) 168 | fb_p_val = get_pval(y2, t2) 169 | 170 | self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj 171 | 172 | # ---------------------------------------------------------------- 173 | # Ignore "mse" here, come back to it later? 174 | # ---------------------------------------------------------------- 175 | 176 | tb = BaseCausalTreeLearnNode(obj=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val, 177 | node_depth=node.node_depth + 1, 178 | num_samples=y1.shape[0]) 179 | fb = BaseCausalTreeLearnNode(obj=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val, 180 | node_depth=node.node_depth + 1, 181 | num_samples=y2.shape[0]) 182 | 183 | node.true_branch = self._fit(tb, train_x1, train_y1, train_t1, val_x1, val_y1, val_t1) 184 | node.false_branch = self._fit(fb, train_x2, train_y2, train_t2, val_x2, val_y2, val_t2) 185 | 186 | if node.effect > self.max_effect: 187 | self.max_effect = node.effect 188 | if node.effect < self.min_effect: 189 | self.min_effect = node.effect 190 | 191 | return node 192 | 193 | else: 194 | if node.effect > self.max_effect: 195 | self.max_effect = node.effect 196 | if node.effect < self.min_effect: 197 | self.min_effect = node.effect 198 | 199 | self.num_leaves += 1 200 | node.leaf_num = self.num_leaves 201 | node.is_leaf = True 202 | return node 203 | -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/ctl_trigger/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/ctl_trigger/__init__.py -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/ctl_trigger/adaptive_trigger.py: -------------------------------------------------------------------------------- 1 | from CTL.causal_tree.ctl_trigger.trigger_ctl import * 2 | from sklearn.model_selection import train_test_split 3 | 4 | 5 | class AdaptiveTriggerNode(TriggerNode): 6 | 7 | def __init__(self, **kwargs): 8 | super().__init__(**kwargs) 9 | 10 | # self.obj = obj 11 | 12 | 13 | # ---------------------------------------------------------------- 14 | # Base causal tree (ctl, base objective) 15 | # ---------------------------------------------------------------- 16 | class AdaptiveTriggerTree(TriggerTree): 17 | 18 | def __init__(self, **kwargs): 19 | super().__init__(**kwargs) 20 | self.root = AdaptiveTriggerNode() 21 | 22 | def adaptive_eval(self, train_y, train_t): 23 | 24 | total_train = train_y.shape[0] 25 | return_val = (-np.inf, -np.inf, -np.inf) 26 | 27 | if total_train == 0: 28 | return return_val 29 | 30 | train_effect, best_trigger = tau_squared_trigger(train_y, train_t, self.min_size, self.quartile) 31 | 32 | if train_effect <= -np.inf: 33 | return return_val 34 | 35 | train_err = train_effect ** 2 36 | 37 | train_mse = total_train * train_err 38 | obj = train_mse 39 | 40 | best_obj = obj 41 | best_mse = train_err 42 | 43 | return best_obj, best_trigger, best_mse 44 | 45 | def fit(self, x, y, t): 46 | if x.shape[0] == 0: 47 | return 0 48 | 49 | # ---------------------------------------------------------------- 50 | # Seed 51 | # ---------------------------------------------------------------- 52 | np.random.seed(self.seed) 53 | 54 | # ---------------------------------------------------------------- 55 | # Verbosity? 56 | # ---------------------------------------------------------------- 57 | 58 | # ---------------------------------------------------------------- 59 | # Split data 60 | # ---------------------------------------------------------------- 61 | 62 | self.root.num_samples = y.shape[0] 63 | # ---------------------------------------------------------------- 64 | # effect and pvals 65 | # ---------------------------------------------------------------- 66 | effect, trigger = tau_squared_trigger(y, t, self.min_size, self.quartile) 67 | p_val = get_pval_trigger(y, t, trigger) 68 | self.root.effect = effect 69 | self.root.p_val = p_val 70 | self.root.trigger = trigger 71 | 72 | # ---------------------------------------------------------------- 73 | # Not sure if i should eval in root or not 74 | # ---------------------------------------------------------------- 75 | node_eval, trigger, mse = self.adaptive_eval(y, t) 76 | self.root.obj = node_eval 77 | 78 | # ---------------------------------------------------------------- 79 | # Add control/treatment means 80 | # ---------------------------------------------------------------- 81 | self.root.control_mean = np.mean(y[t >= trigger]) 82 | self.root.treatment_mean = np.mean(y[t < trigger]) 83 | 84 | self.root.num_samples = x.shape[0] 85 | 86 | self._fit(self.root, x, y, t) 87 | 88 | def _fit(self, node: AdaptiveTriggerNode, train_x, train_y, train_t): 89 | 90 | if train_x.shape[0] == 0: 91 | return node 92 | 93 | if node.node_depth > self.tree_depth: 94 | self.tree_depth = node.node_depth 95 | 96 | if self.max_depth == self.tree_depth: 97 | if node.effect > self.max_effect: 98 | self.max_effect = node.effect 99 | if node.effect < self.min_effect: 100 | self.min_effect = node.effect 101 | self.num_leaves += 1 102 | node.leaf_num = self.num_leaves 103 | node.is_leaf = True 104 | return node 105 | 106 | best_gain = 0.0 107 | best_attributes = [] 108 | best_tb_obj, best_fb_obj = (0.0, 0.0) 109 | best_tb_trigger, best_fb_trigger = (0.0, 0.0) 110 | 111 | column_count = train_x.shape[1] 112 | for col in range(0, column_count): 113 | unique_vals = np.unique(train_x[:, col]) 114 | 115 | if self.max_values is not None: 116 | if self.max_values < 1: 117 | idx = np.round(np.linspace(0, len(unique_vals) - 1, self.max_values * len(unique_vals))).astype(int) 118 | unique_vals = unique_vals[idx] 119 | else: 120 | idx = np.round(np.linspace( 121 | 0, len(unique_vals) - 1, self.max_values)).astype(int) 122 | unique_vals = unique_vals[idx] 123 | 124 | for value in unique_vals: 125 | 126 | # check training data size 127 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 128 | = divide_set(train_x, train_y, train_t, col, value) 129 | check1 = check_min_size(self.min_size, train_t1) 130 | check2 = check_min_size(self.min_size, train_t2) 131 | if check1 or check2: 132 | continue 133 | 134 | tb_eval, tb_trigger, tb_mse = self.adaptive_eval(train_y1, train_t1) 135 | fb_eval, fb_trigger, fb_mse = self.adaptive_eval(train_y2, train_t2) 136 | 137 | split_eval = (tb_eval + fb_eval) 138 | gain = -node.obj + split_eval 139 | 140 | if gain > best_gain: 141 | best_gain = gain 142 | best_attributes = [col, value] 143 | best_tb_obj, best_fb_obj = (tb_eval, fb_eval) 144 | best_tb_trigger, best_fb_trigger = (tb_trigger, fb_trigger) 145 | 146 | if best_gain > 0: 147 | node.col = best_attributes[0] 148 | node.value = best_attributes[1] 149 | 150 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 151 | = divide_set(train_x, train_y, train_t, node.col, node.value) 152 | 153 | y1 = train_y1 154 | y2 = train_y2 155 | t1 = train_t1 156 | t2 = train_t2 157 | 158 | best_tb_effect = ace(y1, t1) 159 | best_fb_effect = ace(y2, t2) 160 | tb_p_val = get_pval(y1, t1) 161 | fb_p_val = get_pval(y2, t2) 162 | 163 | self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj 164 | 165 | # ---------------------------------------------------------------- 166 | # Ignore "mse" here, come back to it later? 167 | # ---------------------------------------------------------------- 168 | 169 | tb = AdaptiveTriggerNode(obj=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val, 170 | node_depth=node.node_depth + 1, 171 | num_samples=y1.shape[0], trigger=best_tb_trigger) 172 | fb = AdaptiveTriggerNode(obj=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val, 173 | node_depth=node.node_depth + 1, 174 | num_samples=y2.shape[0], trigger=best_fb_trigger) 175 | 176 | node.true_branch = self._fit(tb, train_x1, train_y1, train_t1) 177 | node.false_branch = self._fit(fb, train_x2, train_y2, train_t2) 178 | 179 | if node.effect > self.max_effect: 180 | self.max_effect = node.effect 181 | if node.effect < self.min_effect: 182 | self.min_effect = node.effect 183 | 184 | return node 185 | 186 | else: 187 | if node.effect > self.max_effect: 188 | self.max_effect = node.effect 189 | if node.effect < self.min_effect: 190 | self.min_effect = node.effect 191 | 192 | self.num_leaves += 1 193 | node.leaf_num = self.num_leaves 194 | node.is_leaf = True 195 | return node 196 | -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/ctl_trigger/ctl_base_trigger.py: -------------------------------------------------------------------------------- 1 | from CTL.causal_tree.ctl_trigger.trigger_ctl import * 2 | from sklearn.model_selection import train_test_split 3 | 4 | 5 | class TriggerBaseNode(TriggerNode): 6 | 7 | def __init__(self, **kwargs): 8 | super().__init__(**kwargs) 9 | 10 | 11 | # ---------------------------------------------------------------- 12 | # Base causal tree (ctl, base objective) 13 | # ---------------------------------------------------------------- 14 | class TriggerTreeBase(TriggerTree): 15 | 16 | def __init__(self, **kwargs): 17 | super().__init__(**kwargs) 18 | self.root = TriggerBaseNode() 19 | 20 | def fit(self, x, y, t): 21 | if x.shape[0] == 0: 22 | return 0 23 | 24 | # ---------------------------------------------------------------- 25 | # Seed 26 | # ---------------------------------------------------------------- 27 | np.random.seed(self.seed) 28 | 29 | # ---------------------------------------------------------------- 30 | # Verbosity? 31 | # ---------------------------------------------------------------- 32 | 33 | # ---------------------------------------------------------------- 34 | # Split data 35 | # ---------------------------------------------------------------- 36 | train_x, val_x, train_y, val_y, train_t, val_t = train_test_split(x, y, t, random_state=self.seed, shuffle=True, 37 | test_size=self.val_split) 38 | self.root.num_samples = y.shape[0] 39 | # ---------------------------------------------------------------- 40 | # effect and pvals 41 | # ---------------------------------------------------------------- 42 | effect, trigger = tau_squared_trigger(y, t, self.min_size, self.quartile) 43 | p_val = get_pval_trigger(y, t, trigger) 44 | self.root.effect = effect 45 | self.root.p_val = p_val 46 | self.root.trigger = trigger 47 | 48 | # ---------------------------------------------------------------- 49 | # Not sure if i should eval in root or not 50 | # ---------------------------------------------------------------- 51 | node_eval, trigger, mse = self._eval(train_y, train_t, val_y, val_t) 52 | self.root.obj = node_eval 53 | 54 | # ---------------------------------------------------------------- 55 | # Add control/treatment means 56 | # ---------------------------------------------------------------- 57 | self.root.control_mean = np.mean(y[t >= trigger]) 58 | self.root.treatment_mean = np.mean(y[t < trigger]) 59 | 60 | self.root.num_samples = x.shape[0] 61 | 62 | self._fit(self.root, train_x, train_y, train_t, val_x, val_y, val_t) 63 | 64 | def _fit(self, node: TriggerBaseNode, train_x, train_y, train_t, val_x, val_y, val_t): 65 | 66 | if train_x.shape[0] == 0 or val_x.shape[0] == 0: 67 | return node 68 | 69 | if node.node_depth > self.tree_depth: 70 | self.tree_depth = node.node_depth 71 | 72 | if self.max_depth == self.tree_depth: 73 | if node.effect > self.max_effect: 74 | self.max_effect = node.effect 75 | if node.effect < self.min_effect: 76 | self.min_effect = node.effect 77 | self.num_leaves += 1 78 | node.leaf_num = self.num_leaves 79 | node.is_leaf = True 80 | return node 81 | 82 | best_gain = 0.0 83 | best_attributes = [] 84 | best_tb_obj, best_fb_obj = (0.0, 0.0) 85 | best_tb_trigger, best_fb_trigger = (0.0, 0.0) 86 | 87 | column_count = train_x.shape[1] 88 | for col in range(0, column_count): 89 | unique_vals = np.unique(train_x[:, col]) 90 | 91 | if self.max_values is not None: 92 | if self.max_values < 1: 93 | idx = np.round(np.linspace(0, len(unique_vals) - 1, self.max_values * len(unique_vals))).astype(int) 94 | unique_vals = unique_vals[idx] 95 | else: 96 | idx = np.round(np.linspace( 97 | 0, len(unique_vals) - 1, self.max_values)).astype(int) 98 | unique_vals = unique_vals[idx] 99 | 100 | for value in unique_vals: 101 | 102 | (val_x1, val_x2, val_y1, val_y2, val_t1, val_t2) \ 103 | = divide_set(val_x, val_y, val_t, col, value) 104 | 105 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 106 | = divide_set(train_x, train_y, train_t, col, value) 107 | 108 | tb_eval, tb_trigger, tb_mse = self._eval(train_y1, train_t1, val_y1, val_t1) 109 | fb_eval, fb_trigger, fb_mse = self._eval(train_y2, train_t2, val_y2, val_t2) 110 | 111 | split_eval = (tb_eval + fb_eval) 112 | gain = -node.obj + split_eval 113 | 114 | if gain > best_gain: 115 | best_gain = gain 116 | best_attributes = [col, value] 117 | best_tb_obj, best_fb_obj = (tb_eval, fb_eval) 118 | best_tb_trigger, best_fb_trigger = (tb_trigger, fb_trigger) 119 | 120 | if best_gain > 0: 121 | node.col = best_attributes[0] 122 | node.value = best_attributes[1] 123 | 124 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 125 | = divide_set(train_x, train_y, train_t, node.col, node.value) 126 | 127 | (val_x1, val_x2, val_y1, val_y2, val_t1, val_t2) \ 128 | = divide_set(val_x, val_y, val_t, node.col, node.value) 129 | 130 | y1 = np.concatenate((train_y1, val_y1)) 131 | y2 = np.concatenate((train_y2, val_y2)) 132 | t1 = np.concatenate((train_t1, val_t1)) 133 | t2 = np.concatenate((train_t2, val_t2)) 134 | 135 | best_tb_effect = ace_trigger(y1, t1, best_tb_trigger) 136 | best_fb_effect = ace_trigger(y2, t2, best_fb_trigger) 137 | tb_p_val = get_pval_trigger(y1, t1, best_tb_trigger) 138 | fb_p_val = get_pval_trigger(y2, t2, best_fb_trigger) 139 | 140 | self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj 141 | 142 | # ---------------------------------------------------------------- 143 | # Ignore "mse" here, come back to it later? 144 | # ---------------------------------------------------------------- 145 | 146 | tb = TriggerBaseNode(obj=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val, 147 | node_depth=node.node_depth + 1, 148 | num_samples=y1.shape[0], trigger=best_tb_trigger) 149 | fb = TriggerBaseNode(obj=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val, 150 | node_depth=node.node_depth + 1, 151 | num_samples=y2.shape[0], trigger=best_fb_trigger) 152 | 153 | node.true_branch = self._fit(tb, train_x1, train_y1, train_t1, val_x1, val_y1, val_t1) 154 | node.false_branch = self._fit(fb, train_x2, train_y2, train_t2, val_x2, val_y2, val_t2) 155 | 156 | if node.effect > self.max_effect: 157 | self.max_effect = node.effect 158 | if node.effect < self.min_effect: 159 | self.min_effect = node.effect 160 | 161 | return node 162 | 163 | else: 164 | if node.effect > self.max_effect: 165 | self.max_effect = node.effect 166 | if node.effect < self.min_effect: 167 | self.min_effect = node.effect 168 | 169 | self.num_leaves += 1 170 | node.leaf_num = self.num_leaves 171 | node.is_leaf = True 172 | return node 173 | -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/nn_pehe/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/nn_pehe/__init__.py -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/nn_pehe/balance_split.py: -------------------------------------------------------------------------------- 1 | from CTL.causal_tree.nn_pehe.tree import * 2 | 3 | 4 | class BaseNode(PEHENode): 5 | 6 | def __init__(self, **kwargs): 7 | super().__init__(**kwargs) 8 | 9 | # self.obj = obj 10 | 11 | 12 | # ---------------------------------------------------------------- 13 | # Base causal tree (ctl, base objective) 14 | # ---------------------------------------------------------------- 15 | class BalanceBasePEHE(PEHETree): 16 | 17 | def __init__(self, eval2=False, **kwargs): 18 | super().__init__(**kwargs) 19 | self.root = BaseNode() 20 | self.eval2 = eval2 21 | 22 | def fit(self, x, y, t): 23 | if x.shape[0] == 0: 24 | return 0 25 | 26 | # ---------------------------------------------------------------- 27 | # Seed 28 | # ---------------------------------------------------------------- 29 | np.random.seed(self.seed) 30 | 31 | self.root.num_samples = y.shape[0] 32 | self.num_training = y.shape[0] 33 | 34 | # ---------------------------------------------------------------- 35 | # NN_effect estimates 36 | # use the overall datasets for nearest neighbor for now 37 | # ---------------------------------------------------------------- 38 | nn_effect = self.compute_nn_effect(x, y, t, k=self.k) 39 | 40 | # ---------------------------------------------------------------- 41 | # effect and pvals 42 | # ---------------------------------------------------------------- 43 | effect = tau_squared(y, t) 44 | p_val = get_pval(y, t) 45 | self.root.effect = effect 46 | self.root.p_val = p_val 47 | 48 | # ---------------------------------------------------------------- 49 | # Not sure if i should eval in root or not 50 | # ---------------------------------------------------------------- 51 | nn_pehe = self._eval(y, t, nn_effect) 52 | self.root.pehe = nn_pehe 53 | self.pehe = self.root.pehe 54 | 55 | # ---------------------------------------------------------------- 56 | # Add control/treatment means 57 | # ---------------------------------------------------------------- 58 | self.root.control_mean = np.mean(y[t == 0]) 59 | self.root.treatment_mean = np.mean(y[t == 1]) 60 | 61 | self.root.num_samples = x.shape[0] 62 | 63 | self._fit(self.root, x, y, t, nn_effect) 64 | 65 | if self.num_leaves > 0: 66 | self.pehe = self.pehe / self.num_leaves 67 | 68 | def _eval(self, train_y, train_t, nn_effect): 69 | 70 | # treated = np.where(train_t == 1)[0] 71 | # control = np.where(train_t == 0)[0] 72 | # pred_effect = np.mean(train_y[treated]) - np.mean(train_y[control]) 73 | pred_effect = ace(train_y, train_t) 74 | 75 | # nn_pehe = np.mean((nn_effect - pred_effect) ** 2) 76 | nn_pehe = np.sum((nn_effect - pred_effect) ** 2) 77 | 78 | return nn_pehe 79 | 80 | def _fit(self, node: BaseNode, train_x, train_y, train_t, nn_effect): 81 | 82 | if train_x.shape[0] == 0: 83 | return node 84 | 85 | if node.node_depth > self.tree_depth: 86 | self.tree_depth = node.node_depth 87 | 88 | if self.max_depth == self.tree_depth: 89 | self.num_leaves += 1 90 | node.leaf_num = self.num_leaves 91 | node.is_leaf = True 92 | return node 93 | 94 | # print(self.tree_depth, self.obj) 95 | 96 | best_gain = 0.0 97 | # best_gain = node.pehe # min amount 98 | best_attributes = [] 99 | best_tb_obj, best_fb_obj = (0.0, 0.0) 100 | 101 | column_count = train_x.shape[1] 102 | for col in range(0, column_count): 103 | unique_vals = np.unique(train_x[:, col]) 104 | 105 | for value in unique_vals: 106 | # check training data size 107 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 108 | = divide_set(train_x, train_y, train_t, col, value) 109 | check1 = check_min_size(self.min_size, train_t1) 110 | check2 = check_min_size(self.min_size, train_t2) 111 | if check1 or check2: 112 | continue 113 | (_, _, nn_effect1, nn_effect2, _, _) \ 114 | = divide_set(train_x, nn_effect, train_t, col, value) 115 | 116 | tb_eval = self._eval(train_y1, train_t1, nn_effect1) 117 | fb_eval = self._eval(train_y2, train_t2, nn_effect2) 118 | 119 | split_difference = np.abs(tb_eval - fb_eval) 120 | 121 | split_eval = (tb_eval + fb_eval) 122 | gain = node.pehe - split_eval - split_difference 123 | 124 | if gain > best_gain: 125 | best_gain = gain 126 | best_attributes = [col, value] 127 | best_tb_obj, best_fb_obj = (tb_eval, fb_eval) 128 | # if self.eval2: 129 | # split_eval, value, tb_eval, fb_eval = self._eval2(unique_vals, train_x, train_y, train_t, nn_effect, 130 | # col, node.pehe) 131 | # 132 | # gain = node.pehe - split_eval 133 | # 134 | # if gain > best_gain: 135 | # best_gain = gain 136 | # best_attributes = [col, value] 137 | # best_tb_obj, best_fb_obj = (tb_eval, fb_eval) 138 | # else: 139 | # for value in unique_vals: 140 | # # check training data size 141 | # (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 142 | # = divide_set(train_x, train_y, train_t, col, value) 143 | # check1 = check_min_size(self.min_size, train_t1) 144 | # check2 = check_min_size(self.min_size, train_t2) 145 | # if check1 or check2: 146 | # continue 147 | # (_, _, nn_effect1, nn_effect2, _, _) \ 148 | # = divide_set(train_x, nn_effect, train_t, col, value) 149 | # 150 | # tb_eval = self._eval(train_y1, train_t1, nn_effect1) 151 | # fb_eval = self._eval(train_y2, train_t2, nn_effect2) 152 | # 153 | # split_eval = (tb_eval + fb_eval) 154 | # gain = node.pehe - split_eval 155 | # 156 | # if gain > best_gain: 157 | # best_gain = gain 158 | # best_attributes = [col, value] 159 | # best_tb_obj, best_fb_obj = (tb_eval, fb_eval) 160 | 161 | if best_gain > 0: 162 | node.col = best_attributes[0] 163 | node.value = best_attributes[1] 164 | 165 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 166 | = divide_set(train_x, train_y, train_t, node.col, node.value) 167 | (_, _, nn_effect1, nn_effect2, _, _) \ 168 | = divide_set(train_x, nn_effect, train_t, node.col, node.value) 169 | 170 | y1 = train_y1 171 | y2 = train_y2 172 | t1 = train_t1 173 | t2 = train_t2 174 | 175 | best_tb_effect = ace(y1, t1) 176 | best_fb_effect = ace(y2, t2) 177 | tb_p_val = get_pval(y1, t1) 178 | fb_p_val = get_pval(y2, t2) 179 | 180 | self.pehe = self.pehe - node.pehe + best_tb_obj + best_fb_obj 181 | 182 | tb = BaseNode(obj=best_tb_obj, pehe=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val, 183 | node_depth=node.node_depth + 1, 184 | num_samples=y1.shape[0]) 185 | fb = BaseNode(obj=best_fb_obj, pehe=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val, 186 | node_depth=node.node_depth + 1, 187 | num_samples=y2.shape[0]) 188 | 189 | node.true_branch = self._fit(tb, train_x1, train_y1, train_t1, nn_effect1) 190 | node.false_branch = self._fit(fb, train_x2, train_y2, train_t2, nn_effect2) 191 | 192 | if node.effect > self.max_effect: 193 | self.max_effect = node.effect 194 | if node.effect < self.min_effect: 195 | self.min_effect = node.effect 196 | 197 | return node 198 | 199 | else: 200 | if node.effect > self.max_effect: 201 | self.max_effect = node.effect 202 | if node.effect < self.min_effect: 203 | self.min_effect = node.effect 204 | 205 | self.num_leaves += 1 206 | node.leaf_num = self.num_leaves 207 | node.is_leaf = True 208 | return node 209 | -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/nn_pehe/honest.py: -------------------------------------------------------------------------------- 1 | from CTL.causal_tree.nn_pehe.tree import * 2 | from sklearn.model_selection import train_test_split 3 | 4 | 5 | class HonestNode(PEHENode): 6 | 7 | def __init__(self, **kwargs): 8 | super().__init__(**kwargs) 9 | 10 | # self.obj = obj 11 | 12 | 13 | # ---------------------------------------------------------------- 14 | # Base causal tree (ctl, base objective) 15 | # ---------------------------------------------------------------- 16 | class HonestPEHE(PEHETree): 17 | 18 | def __init__(self, **kwargs): 19 | super().__init__(**kwargs) 20 | self.root = HonestNode() 21 | 22 | def fit(self, x, y, t): 23 | if x.shape[0] == 0: 24 | return 0 25 | 26 | # ---------------------------------------------------------------- 27 | # Seed 28 | # ---------------------------------------------------------------- 29 | np.random.seed(self.seed) 30 | 31 | # ---------------------------------------------------------------- 32 | # Split data 33 | # ---------------------------------------------------------------- 34 | x, est_x, y, est_y, t, est_t = train_test_split(x, y, t, random_state=self.seed, shuffle=True, 35 | test_size=0.5) 36 | self.root.num_samples = est_y.shape[0] 37 | self.num_training = y.shape[0] 38 | 39 | # ---------------------------------------------------------------- 40 | # NN_effect estimates 41 | # use the overall datasets for nearest neighbor for now 42 | # ---------------------------------------------------------------- 43 | nn_effect = compute_nn_effect(x, y, t, k=self.k) 44 | # val_nn_effect = compute_nn_effect(est_x, est_y, est_t, k=self.k) 45 | 46 | # ---------------------------------------------------------------- 47 | # effect and pvals 48 | # ---------------------------------------------------------------- 49 | effect = tau_squared(y, t) 50 | p_val = get_pval(y, t) 51 | self.root.effect = effect 52 | self.root.p_val = p_val 53 | 54 | # ---------------------------------------------------------------- 55 | # Not sure if i should eval in root or not 56 | # ---------------------------------------------------------------- 57 | nn_pehe = self._eval(y, t, nn_effect) 58 | self.root.obj = nn_pehe 59 | self.obj = self.root.obj 60 | 61 | # ---------------------------------------------------------------- 62 | # Add control/treatment means 63 | # ---------------------------------------------------------------- 64 | self.root.control_mean = np.mean(y[t == 0]) 65 | self.root.treatment_mean = np.mean(y[t == 1]) 66 | 67 | self.root.num_samples = x.shape[0] 68 | 69 | self._fit(self.root, x, y, t, nn_effect, est_x, est_y, est_t) 70 | 71 | if self.num_leaves > 0: 72 | self.obj = self.obj / self.num_leaves 73 | 74 | def _eval(self, train_y, train_t, nn_effect): 75 | 76 | # total_train = train_y.shape[0] 77 | 78 | # treated = np.where(train_t == 1)[0] 79 | # control = np.where(train_t == 0)[0] 80 | # pred_effect = np.mean(train_y[treated]) - np.mean(train_y[control]) 81 | pred_effect = ace(train_y, train_t) 82 | 83 | # nn_pehe = np.mean((nn_effect - pred_effect) ** 2) 84 | nn_pehe = np.sum((nn_effect - pred_effect) ** 2) 85 | 86 | # val_effect = ace(val_y, val_t) 87 | # val_nn_pehe = np.sum((val_nn_effect - pred_effect) ** 2) 88 | # val_train_ratio = total_train / total_val 89 | # val_nn_pehe = val_nn_pehe * val_train_ratio 90 | # pehe_diff = np.abs(nn_pehe - val_nn_pehe) 91 | 92 | # cost = np.abs(total_train * pred_effect - total_train * val_effect) 93 | 94 | var_t, var_c = variance(train_y, train_t) 95 | 96 | return nn_pehe 97 | 98 | def _fit(self, node: HonestNode, train_x, train_y, train_t, nn_effect, est_x, est_y, est_t): 99 | 100 | if train_x.shape[0] == 0: 101 | return node 102 | 103 | if node.node_depth > self.tree_depth: 104 | self.tree_depth = node.node_depth 105 | 106 | if self.max_depth == self.tree_depth: 107 | if node.effect > self.max_effect: 108 | self.max_effect = node.effect 109 | if node.effect < self.min_effect: 110 | self.min_effect = node.effect 111 | self.num_leaves += 1 112 | node.leaf_num = self.num_leaves 113 | node.is_leaf = True 114 | return node 115 | 116 | # print(self.tree_depth, self.obj) 117 | 118 | best_gain = 0.0 119 | best_attributes = [] 120 | best_tb_obj, best_fb_obj = (0.0, 0.0) 121 | 122 | column_count = train_x.shape[1] 123 | for col in range(0, column_count): 124 | unique_vals = np.unique(train_x[:, col]) 125 | 126 | for value in unique_vals: 127 | (est_x1, est_x2, est_y1, est_y2, est_t1, est_t2) \ 128 | = divide_set(est_x, est_y, est_t, col, value) 129 | 130 | # check est set size 131 | if check_min_size(self.min_size, est_t1) or check_min_size(self.min_size, est_t2): 132 | continue 133 | 134 | # check training data size 135 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 136 | = divide_set(train_x, train_y, train_t, col, value) 137 | check1 = check_min_size(self.min_size, train_t1) 138 | check2 = check_min_size(self.min_size, train_t2) 139 | if check1 or check2: 140 | continue 141 | (_, _, nn_effect1, nn_effect2, _, _) \ 142 | = divide_set(train_x, nn_effect, train_t, col, value) 143 | 144 | tb_eval = self._eval(train_y1, train_t1, nn_effect1) 145 | fb_eval = self._eval(train_y2, train_t2, nn_effect2) 146 | 147 | split_eval = (tb_eval + fb_eval) 148 | gain = node.obj - split_eval 149 | 150 | if gain > best_gain: 151 | best_gain = gain 152 | best_attributes = [col, value] 153 | best_tb_obj, best_fb_obj = (tb_eval, fb_eval) 154 | 155 | # print(tb_eval, fb_eval, gain, best_gain) 156 | 157 | if best_gain > 0: 158 | node.col = best_attributes[0] 159 | node.value = best_attributes[1] 160 | 161 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 162 | = divide_set(train_x, train_y, train_t, node.col, node.value) 163 | (est_x1, est_x2, est_y1, est_y2, est_t1, est_t2) \ 164 | = divide_set(est_x, est_y, est_t, node.col, node.value) 165 | (_, _, nn_effect1, nn_effect2, _, _) \ 166 | = divide_set(train_x, nn_effect, train_t, node.col, node.value) 167 | 168 | # y1 = train_y1 169 | # y2 = train_y2 170 | # t1 = train_t1 171 | # t2 = train_t2 172 | # y1 = np.concatenate((train_y1, val_y1)) 173 | # y2 = np.concatenate((train_y2, val_y2)) 174 | # t1 = np.concatenate((train_t1, val_t1)) 175 | # t2 = np.concatenate((train_t2, val_t2)) 176 | y1 = est_y1 177 | y2 = est_y2 178 | t1 = est_t1 179 | t2 = est_t2 180 | 181 | best_tb_effect = ace(y1, t1) 182 | best_fb_effect = ace(y2, t2) 183 | tb_p_val = get_pval(y1, t1) 184 | fb_p_val = get_pval(y2, t2) 185 | 186 | self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj 187 | 188 | tb = HonestNode(obj=best_tb_obj, pehe=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val, 189 | node_depth=node.node_depth + 1, 190 | num_samples=train_y1.shape[0]) 191 | fb = HonestNode(obj=best_fb_obj, pehe=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val, 192 | node_depth=node.node_depth + 1, 193 | num_samples=train_y2.shape[0]) 194 | 195 | node.true_branch = self._fit(tb, train_x1, train_y1, train_t1, nn_effect1, est_x1, est_y1, est_t1) 196 | node.false_branch = self._fit(fb, train_x2, train_y2, train_t2, nn_effect2, est_x2, est_y2, est_t2) 197 | 198 | if node.effect > self.max_effect: 199 | self.max_effect = node.effect 200 | if node.effect < self.min_effect: 201 | self.min_effect = node.effect 202 | 203 | return node 204 | 205 | else: 206 | if node.effect > self.max_effect: 207 | self.max_effect = node.effect 208 | if node.effect < self.min_effect: 209 | self.min_effect = node.effect 210 | 211 | self.num_leaves += 1 212 | node.leaf_num = self.num_leaves 213 | node.is_leaf = True 214 | return node 215 | -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/nn_pehe/tree.py: -------------------------------------------------------------------------------- 1 | try: 2 | from CTL.causal_tree.util_c import * 3 | except: 4 | from CTL.causal_tree.util import * 5 | from CTL.causal_tree.ct import * 6 | import numpy as np 7 | from scipy.spatial import cKDTree 8 | 9 | 10 | # TODO: Add weighting on evaluations 11 | # TODO: add weighting on k > 1 nearest neighbors? 12 | 13 | def compute_nn_effect(x, y, t, k=1): 14 | kdtree = cKDTree(x) 15 | d, idx = kdtree.query(x, k=x.shape[0]) 16 | idx = idx[:, 1:] 17 | treated = np.where(t == 1)[0] 18 | control = np.where(t == 0)[0] 19 | bool_treated = np.isin(idx, treated) 20 | bool_control = np.isin(idx, control) 21 | 22 | nn_effect = np.zeros(x.shape[0]) 23 | for i in range(len(bool_treated)): 24 | i_treat_idx = np.where(bool_treated[i, :])[0][:k] 25 | i_control_idx = np.where(bool_control[i, :])[0][:k] 26 | 27 | i_treat_nn = y[idx[i, i_treat_idx]] 28 | i_cont_nn = y[idx[i, i_control_idx]] 29 | 30 | nn_effect[i] = np.mean(i_treat_nn) - np.mean(i_cont_nn) 31 | 32 | return nn_effect 33 | 34 | 35 | class PEHENode(CTNode): 36 | 37 | def __init__(self, p_val=1.0, effect=0.0, node_depth=0, control_mean=0.0, treatment_mean=0.0, col=-1, value=-1, 38 | is_leaf=False, leaf_num=-1, num_samples=0.0, obj=0.0, pehe=0.0): 39 | super().__init__() 40 | # not tree specific features (most likely added at creation) 41 | self.p_val = p_val 42 | self.effect = effect 43 | self.node_depth = node_depth 44 | self.control_mean = control_mean 45 | self.treatment_mean = treatment_mean 46 | 47 | # during tree building 48 | self.obj = obj 49 | self.num_samples = num_samples 50 | self.pehe = pehe 51 | 52 | # after building tree 53 | self.col = col 54 | self.value = value 55 | self.is_leaf = is_leaf 56 | self.leaf_num = leaf_num 57 | self.true_branch = None 58 | self.false_branch = None 59 | 60 | # after calling functions 61 | self.column_name = "" 62 | self.decision = "" 63 | 64 | 65 | class PEHETree(CausalTree): 66 | 67 | def __init__(self, split_size=0.5, max_depth=-1, min_size=2, max_values=None, verbose=False, 68 | k=1, use_propensity=False, propensity_model=None, 69 | seed=724): 70 | super().__init__() 71 | self.val_split = split_size 72 | self.max_depth = max_depth 73 | self.min_size = min_size 74 | self.seed = seed 75 | 76 | self.max_values = max_values 77 | self.verbose = verbose 78 | 79 | self.max_effect = 0.0 80 | self.min_effect = 0.0 81 | 82 | self.features = None 83 | 84 | self.k = k 85 | self.num_training = 1 86 | self.pehe = 0 87 | self.use_propensity = use_propensity 88 | if use_propensity: 89 | if propensity_model is not None: 90 | self.proensity_model = propensity_model 91 | else: 92 | from sklearn.linear_model import LogisticRegression 93 | self.proensity_model = LogisticRegression() 94 | 95 | self.root = PEHENode() 96 | 97 | def compute_nn_effect(self, x, y, t, k=1): 98 | if self.use_propensity: 99 | self.proensity_model.fit(x, t) 100 | propensity = self.proensity_model.predict_proba(x)[:, 1:] 101 | kdtree = cKDTree(propensity) 102 | _, idx = kdtree.query(propensity, k=x.shape[0]) 103 | else: 104 | kdtree = cKDTree(x) 105 | _, idx = kdtree.query(x, k=x.shape[0]) 106 | idx = idx[:, 1:] 107 | treated = np.where(t == 1)[0] 108 | control = np.where(t == 0)[0] 109 | bool_treated = np.isin(idx, treated) 110 | bool_control = np.isin(idx, control) 111 | 112 | nn_effect = np.zeros(x.shape) 113 | for i in range(len(bool_treated)): 114 | i_treat_idx = np.where(bool_treated[i, :])[0][:k] 115 | i_control_idx = np.where(bool_control[i, :])[0][:k] 116 | 117 | i_treat_nn = y[idx[i, i_treat_idx]] 118 | i_cont_nn = y[idx[i, i_control_idx]] 119 | 120 | nn_effect[i] = np.mean(i_treat_nn) - np.mean(i_cont_nn) 121 | 122 | return nn_effect 123 | 124 | @abstractmethod 125 | def fit(self, x, y, t): 126 | pass 127 | 128 | def predict(self, x): 129 | 130 | def _predict(node: PEHENode, observation): 131 | if node.is_leaf: 132 | return node.effect 133 | else: 134 | v = observation[node.col] 135 | if v >= node.value: 136 | branch = node.true_branch 137 | else: 138 | branch = node.false_branch 139 | 140 | return _predict(branch, observation) 141 | 142 | if len(x.shape) == 1: 143 | prediction = _predict(self.root, x) 144 | return prediction 145 | 146 | num_test = x.shape[0] 147 | 148 | prediction = np.zeros(num_test) 149 | 150 | for i in range(num_test): 151 | test_example = x[i, :] 152 | prediction[i] = _predict(self.root, test_example) 153 | 154 | return prediction 155 | 156 | def get_groups(self, x): 157 | 158 | def _get_group(node: PEHENode, observation): 159 | if node.is_leaf: 160 | return node.leaf_num 161 | else: 162 | v = observation[node.col] 163 | if v >= node.value: 164 | branch = node.true_branch 165 | else: 166 | branch = node.false_branch 167 | 168 | return _get_group(branch, observation) 169 | 170 | if len(x.shape) == 1: 171 | return _get_group(self.root, x) 172 | num_test = x.shape[0] 173 | leaf_results = np.zeros(num_test) 174 | 175 | for i in range(num_test): 176 | test_example = x[i, :] 177 | leaf_results[i] = _get_group(self.root, test_example) 178 | 179 | return leaf_results 180 | 181 | def get_features(self, x): 182 | 183 | def _get_features(node: PEHENode, observation, features): 184 | if node.is_leaf: 185 | return features 186 | else: 187 | v = observation[node.col] 188 | if v >= node.value: 189 | branch = node.true_branch 190 | else: 191 | branch = node.false_branch 192 | 193 | features.append(node.decision) 194 | return _get_features(branch, observation, features) 195 | 196 | if len(x.shape) == 1: 197 | features = [] 198 | return _get_features(self.root, x, features) 199 | num_test = x.shape[0] 200 | leaf_features = [] 201 | 202 | for i in range(num_test): 203 | features = [] 204 | test_example = x[i, :] 205 | leaf_features.append(_get_features(self.root, test_example, features)) 206 | 207 | return leaf_features 208 | 209 | def prune(self, alpha=0.05): 210 | 211 | def _prune(node: PEHENode): 212 | if node.true_branch is None or node.false_branch is None: 213 | return 214 | 215 | # recursive call for each branch 216 | if not node.true_branch.is_leaf: 217 | _prune(node.true_branch) 218 | if not node.false_branch.is_leaf: 219 | _prune(node.false_branch) 220 | 221 | # merge leaves (potentially) 222 | if node.true_branch.is_leaf and node.false_branch.is_leaf: 223 | # Get branches 224 | tb = node.true_branch 225 | fb = node.false_branch 226 | 227 | tb_pval = tb.p_val 228 | fb_pval = fb.p_val 229 | 230 | if tb_pval > alpha and fb_pval > alpha: 231 | node.leaf_num = node.true_branch.leaf_num 232 | node.true_branch = None 233 | node.false_branch = None 234 | self.num_leaves = self.num_leaves - 1 235 | node.is_leaf = True 236 | 237 | # ---------------------------------------------------------------- 238 | # Something about obj/mse? if that is added 239 | # 240 | # - can do a self function so that tree references itself/it's own type of node? 241 | # ---------------------------------------------------------------- 242 | if tb.node_depth == self.tree_depth: 243 | self.tree_depth = self.tree_depth - 1 244 | 245 | _prune(self.root) 246 | 247 | def get_triggers(self, x): 248 | pass 249 | 250 | def save(self, filename): 251 | import pickle as pkl 252 | 253 | check_dir(filename) 254 | with open(filename, "wb") as file: 255 | pkl.dump(self, file) 256 | -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/r_tree/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/r_tree/__init__.py -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/r_tree/tree.py: -------------------------------------------------------------------------------- 1 | try: 2 | from CTL.causal_tree.util_c import * 3 | except: 4 | from CTL.causal_tree.util import * 5 | from CTL.causal_tree.ct import * 6 | import numpy as np 7 | from scipy.spatial import cKDTree 8 | 9 | 10 | # TODO: Add weighting on evaluations 11 | # TODO: add weighting on k > 1 nearest neighbors? 12 | 13 | def compute_nn_effect(x, y, t, k=1): 14 | kdtree = cKDTree(x) 15 | d, idx = kdtree.query(x, k=x.shape[0]) 16 | idx = idx[:, 1:] 17 | treated = np.where(t == 1)[0] 18 | control = np.where(t == 0)[0] 19 | bool_treated = np.isin(idx, treated) 20 | bool_control = np.isin(idx, control) 21 | 22 | nn_effect = np.zeros(x.shape) 23 | for i in range(len(bool_treated)): 24 | i_treat_idx = np.where(bool_treated[i, :])[0][:k] 25 | i_control_idx = np.where(bool_control[i, :])[0][:k] 26 | 27 | i_treat_nn = y[idx[i, i_treat_idx]] 28 | i_cont_nn = y[idx[i, i_control_idx]] 29 | 30 | nn_effect[i] = np.mean(i_treat_nn) - np.mean(i_cont_nn) 31 | 32 | return nn_effect 33 | 34 | 35 | class RNode(CTNode): 36 | 37 | def __init__(self, p_val=1.0, effect=0.0, node_depth=0, control_mean=0.0, treatment_mean=0.0, col=-1, value=-1, 38 | is_leaf=False, leaf_num=-1, num_samples=0.0, obj=0.0, pehe=0.0): 39 | super().__init__() 40 | # not tree specific features (most likely added at creation) 41 | self.p_val = p_val 42 | self.effect = effect 43 | self.node_depth = node_depth 44 | self.control_mean = control_mean 45 | self.treatment_mean = treatment_mean 46 | 47 | # during tree building 48 | self.obj = obj 49 | self.num_samples = num_samples 50 | self.pehe = pehe 51 | 52 | # after building tree 53 | self.col = col 54 | self.value = value 55 | self.is_leaf = is_leaf 56 | self.leaf_num = leaf_num 57 | self.true_branch = None 58 | self.false_branch = None 59 | 60 | # after calling functions 61 | self.column_name = "" 62 | self.decision = "" 63 | 64 | 65 | class RTree(CausalTree): 66 | 67 | def __init__(self, split_size=0.5, max_depth=-1, min_size=2, max_values=None, verbose=False, 68 | k=1, use_propensity=False, propensity_model=None, 69 | seed=724): 70 | super().__init__() 71 | self.val_split = split_size 72 | self.max_depth = max_depth 73 | self.min_size = min_size 74 | self.seed = seed 75 | 76 | self.max_values = max_values 77 | self.verbose = verbose 78 | 79 | self.max_effect = 0.0 80 | self.min_effect = 0.0 81 | 82 | self.features = None 83 | 84 | self.k = k 85 | self.num_training = 1 86 | self.pehe = 0 87 | self.use_propensity = use_propensity 88 | if use_propensity: 89 | if propensity_model is not None: 90 | self.proensity_model = propensity_model 91 | else: 92 | from sklearn.linear_model import LogisticRegression 93 | self.proensity_model = LogisticRegression() 94 | 95 | self.root = RNode() 96 | 97 | def compute_nn_effect(self, x, y, t, k=1): 98 | if self.use_propensity: 99 | self.proensity_model.fit(x, t) 100 | propensity = self.proensity_model.predict_proba(x)[:, 1:] 101 | kdtree = cKDTree(propensity) 102 | _, idx = kdtree.query(propensity, k=x.shape[0]) 103 | else: 104 | kdtree = cKDTree(x) 105 | _, idx = kdtree.query(x, k=x.shape[0]) 106 | idx = idx[:, 1:] 107 | treated = np.where(t == 1)[0] 108 | control = np.where(t == 0)[0] 109 | bool_treated = np.isin(idx, treated) 110 | bool_control = np.isin(idx, control) 111 | 112 | nn_effect = np.zeros(x.shape) 113 | for i in range(len(bool_treated)): 114 | i_treat_idx = np.where(bool_treated[i, :])[0][:k] 115 | i_control_idx = np.where(bool_control[i, :])[0][:k] 116 | 117 | i_treat_nn = y[idx[i, i_treat_idx]] 118 | i_cont_nn = y[idx[i, i_control_idx]] 119 | 120 | nn_effect[i] = np.mean(i_treat_nn) - np.mean(i_cont_nn) 121 | 122 | return nn_effect 123 | 124 | @abstractmethod 125 | def fit(self, x, y, t): 126 | pass 127 | 128 | def predict(self, x): 129 | 130 | def _predict(node: PEHENode, observation): 131 | if node.is_leaf: 132 | return node.effect 133 | else: 134 | v = observation[node.col] 135 | if v >= node.value: 136 | branch = node.true_branch 137 | else: 138 | branch = node.false_branch 139 | 140 | return _predict(branch, observation) 141 | 142 | if len(x.shape) == 1: 143 | prediction = _predict(self.root, x) 144 | return prediction 145 | 146 | num_test = x.shape[0] 147 | 148 | prediction = np.zeros(num_test) 149 | 150 | for i in range(num_test): 151 | test_example = x[i, :] 152 | prediction[i] = _predict(self.root, test_example) 153 | 154 | return prediction 155 | 156 | def get_groups(self, x): 157 | 158 | def _get_group(node: PEHENode, observation): 159 | if node.is_leaf: 160 | return node.leaf_num 161 | else: 162 | v = observation[node.col] 163 | if v >= node.value: 164 | branch = node.true_branch 165 | else: 166 | branch = node.false_branch 167 | 168 | return _get_group(branch, observation) 169 | 170 | if len(x.shape) == 1: 171 | return _get_group(self.root, x) 172 | num_test = x.shape[0] 173 | leaf_results = np.zeros(num_test) 174 | 175 | for i in range(num_test): 176 | test_example = x[i, :] 177 | leaf_results[i] = _get_group(self.root, test_example) 178 | 179 | return leaf_results 180 | 181 | def get_features(self, x): 182 | 183 | def _get_features(node: PEHENode, observation, features): 184 | if node.is_leaf: 185 | return features 186 | else: 187 | v = observation[node.col] 188 | if v >= node.value: 189 | branch = node.true_branch 190 | else: 191 | branch = node.false_branch 192 | 193 | features.append(node.decision) 194 | return _get_features(branch, observation, features) 195 | 196 | if len(x.shape) == 1: 197 | features = [] 198 | return _get_features(self.root, x, features) 199 | num_test = x.shape[0] 200 | leaf_features = [] 201 | 202 | for i in range(num_test): 203 | features = [] 204 | test_example = x[i, :] 205 | leaf_features.append(_get_features(self.root, test_example, features)) 206 | 207 | return leaf_features 208 | 209 | def prune(self, alpha=0.05): 210 | 211 | def _prune(node: PEHENode): 212 | if node.true_branch is None or node.false_branch is None: 213 | return 214 | 215 | # recursive call for each branch 216 | if not node.true_branch.is_leaf: 217 | _prune(node.true_branch) 218 | if not node.false_branch.is_leaf: 219 | _prune(node.false_branch) 220 | 221 | # merge leaves (potentially) 222 | if node.true_branch.is_leaf and node.false_branch.is_leaf: 223 | # Get branches 224 | tb = node.true_branch 225 | fb = node.false_branch 226 | 227 | tb_pval = tb.p_val 228 | fb_pval = fb.p_val 229 | 230 | if tb_pval > alpha and fb_pval > alpha: 231 | node.leaf_num = node.true_branch.leaf_num 232 | node.true_branch = None 233 | node.false_branch = None 234 | self.num_leaves = self.num_leaves - 1 235 | node.is_leaf = True 236 | 237 | # ---------------------------------------------------------------- 238 | # Something about obj/mse? if that is added 239 | # 240 | # - can do a self function so that tree references itself/it's own type of node? 241 | # ---------------------------------------------------------------- 242 | if tb.node_depth == self.tree_depth: 243 | self.tree_depth = self.tree_depth - 1 244 | 245 | _prune(self.root) 246 | 247 | def get_triggers(self, x): 248 | pass 249 | 250 | def save(self, filename): 251 | import pickle as pkl 252 | 253 | check_dir(filename) 254 | with open(filename, "wb") as file: 255 | pkl.dump(self, file) 256 | -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/sig_diff/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/sig_diff/__init__.py -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/sig_diff/sig.py: -------------------------------------------------------------------------------- 1 | # from CTL.causal_tree.util import * 2 | try: 3 | from CTL.causal_tree.util_c import * 4 | except: 5 | from CTL.causal_tree.util import * 6 | from CTL.causal_tree.ct import * 7 | import numpy as np 8 | from scipy.stats import ttest_ind_from_stats 9 | 10 | 11 | class SigNode(CTNode): 12 | 13 | def __init__(self, p_val=1.0, effect=0.0, node_depth=0, control_mean=0.0, treatment_mean=0.0, col=-1, value=-1, 14 | is_leaf=False, leaf_num=-1, num_samples=0.0, obj=0.0): 15 | super().__init__() 16 | # not tree specific features (most likely added at creation) 17 | self.p_val = p_val 18 | self.effect = effect 19 | self.node_depth = node_depth 20 | self.control_mean = control_mean 21 | self.treatment_mean = treatment_mean 22 | 23 | # during tree building 24 | self.obj = obj 25 | self.num_samples = num_samples 26 | 27 | # after building tree 28 | self.col = col 29 | self.value = value 30 | self.is_leaf = is_leaf 31 | self.leaf_num = leaf_num 32 | self.true_branch = None 33 | self.false_branch = None 34 | 35 | # after calling functions 36 | self.column_name = "" 37 | self.decision = "" 38 | 39 | 40 | class SigTree(CausalTree): 41 | 42 | def __init__(self, alpha=0.05, max_depth=-1, min_size=2, seed=724, max_values=None, verbose=False): 43 | super().__init__() 44 | self.alpha = 0.05 45 | self.max_depth = max_depth 46 | self.min_size = min_size 47 | self.seed = seed 48 | 49 | self.max_values = max_values 50 | self.verbose = verbose 51 | 52 | self.max_effect = 0.0 53 | self.min_effect = 0.0 54 | 55 | self.features = None 56 | 57 | self.root = SigNode() 58 | 59 | @abstractmethod 60 | def fit(self, x, y, t): 61 | pass 62 | 63 | def _eval_util(self, train_y, train_t): 64 | var_t, var_c = variance(train_y, train_t) 65 | std = np.sqrt(var_t) + np.sqrt(var_c) 66 | effect = ace(train_y, train_t) 67 | 68 | return effect, std 69 | 70 | def _eval(self, y_train1, t_train1, y_train2, t_train2): 71 | 72 | total1 = y_train1.shape[0] 73 | total2 = y_train2.shape[0] 74 | 75 | return_val = (1, 1) 76 | if total1 < 1 or total2 < 1: 77 | return return_val 78 | 79 | effect1, std1 = self._eval_util(y_train1, t_train1) 80 | effect2, std2 = self._eval_util(y_train2, t_train2) 81 | 82 | stat, p_val = ttest_ind_from_stats(effect1, std1, total1, effect2, std2, total2) 83 | return stat, p_val 84 | 85 | def predict(self, x): 86 | 87 | def _predict(node: SigNode, observation): 88 | if node.is_leaf: 89 | return node.effect 90 | else: 91 | v = observation[node.col] 92 | if v >= node.value: 93 | branch = node.true_branch 94 | else: 95 | branch = node.false_branch 96 | 97 | return _predict(branch, observation) 98 | 99 | if len(x.shape) == 1: 100 | prediction = _predict(self.root, x) 101 | return prediction 102 | 103 | num_test = x.shape[0] 104 | 105 | prediction = np.zeros(num_test) 106 | 107 | for i in range(num_test): 108 | test_example = x[i, :] 109 | prediction[i] = _predict(self.root, test_example) 110 | 111 | return prediction 112 | 113 | def get_groups(self, x): 114 | 115 | def _get_group(node: SigNode, observation): 116 | if node.is_leaf: 117 | return node.leaf_num 118 | else: 119 | v = observation[node.col] 120 | if v >= node.value: 121 | branch = node.true_branch 122 | else: 123 | branch = node.false_branch 124 | 125 | return _get_group(branch, observation) 126 | 127 | if len(x.shape) == 1: 128 | return _get_group(self.root, x) 129 | num_test = x.shape[0] 130 | leaf_results = np.zeros(num_test) 131 | 132 | for i in range(num_test): 133 | test_example = x[i, :] 134 | leaf_results[i] = _get_group(self.root, test_example) 135 | 136 | return leaf_results 137 | 138 | def get_features(self, x): 139 | 140 | def _get_features(node: SigNode, observation, features): 141 | if node.is_leaf: 142 | return features 143 | else: 144 | v = observation[node.col] 145 | if v >= node.value: 146 | branch = node.true_branch 147 | else: 148 | branch = node.false_branch 149 | 150 | features.append(node.decision) 151 | return _get_features(branch, observation, features) 152 | 153 | if len(x.shape) == 1: 154 | features = [] 155 | return _get_features(self.root, x, features) 156 | num_test = x.shape[0] 157 | leaf_features = [] 158 | 159 | for i in range(num_test): 160 | features = [] 161 | test_example = x[i, :] 162 | leaf_features.append(_get_features(self.root, test_example, features)) 163 | 164 | return leaf_features 165 | 166 | def prune(self, alpha=0.05): 167 | 168 | def _prune(node: SigNode): 169 | if node.true_branch is None or node.false_branch is None: 170 | return 171 | 172 | # recursive call for each branch 173 | if not node.true_branch.is_leaf: 174 | _prune(node.true_branch) 175 | if not node.false_branch.is_leaf: 176 | _prune(node.false_branch) 177 | 178 | # merge leaves (potentially) 179 | if node.true_branch.is_leaf and node.false_branch.is_leaf: 180 | # Get branches 181 | tb = node.true_branch 182 | fb = node.false_branch 183 | 184 | tb_pval = tb.p_val 185 | fb_pval = fb.p_val 186 | 187 | if tb_pval > alpha and fb_pval > alpha: 188 | node.leaf_num = node.true_branch.leaf_num 189 | node.true_branch = None 190 | node.false_branch = None 191 | self.num_leaves = self.num_leaves - 1 192 | node.is_leaf = True 193 | 194 | # ---------------------------------------------------------------- 195 | # Something about obj/mse? if that is added 196 | # 197 | # - can do a self function so that tree references itself/it's own type of node? 198 | # ---------------------------------------------------------------- 199 | if tb.node_depth == self.tree_depth: 200 | self.tree_depth = self.tree_depth - 1 201 | 202 | _prune(self.root) 203 | 204 | def get_triggers(self, x): 205 | pass 206 | 207 | def save(self, filename): 208 | import pickle as pkl 209 | 210 | check_dir(filename) 211 | with open(filename, "wb") as file: 212 | pkl.dump(self, file) -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/sig_diff/sig_base.py: -------------------------------------------------------------------------------- 1 | from CTL.causal_tree.sig_diff.sig import * 2 | 3 | 4 | class BaseCausalTreeLearnNode(SigNode): 5 | 6 | def __init__(self, **kwargs): 7 | super().__init__(**kwargs) 8 | 9 | 10 | class SigTreeBase(SigTree): 11 | 12 | def __init__(self, **kwargs): 13 | super().__init__(**kwargs) 14 | self.root = BaseCausalTreeLearnNode() 15 | 16 | def fit(self, x, y, t): 17 | if x.shape[0] == 0: 18 | return 0 19 | 20 | # ---------------------------------------------------------------- 21 | # Seed 22 | # ---------------------------------------------------------------- 23 | np.random.seed(self.seed) 24 | 25 | train_x, train_y, train_t = x, y, t 26 | self.root.num_samples = train_y.shape[0] 27 | # ---------------------------------------------------------------- 28 | # effect and pvals 29 | # ---------------------------------------------------------------- 30 | effect = tau_squared(y, t) 31 | p_val = get_pval(y, t) 32 | self.root.effect = effect 33 | self.root.p_val = p_val 34 | 35 | self.root.obj = 0 36 | 37 | # ---------------------------------------------------------------- 38 | # Add control/treatment means 39 | # ---------------------------------------------------------------- 40 | self.root.control_mean = np.mean(y[t == 0]) 41 | self.root.treatment_mean = np.mean(y[t == 1]) 42 | 43 | self.root.num_samples = x.shape[0] 44 | 45 | self._fit(self.root, train_x, train_y, train_t) 46 | 47 | def _fit(self, node: BaseCausalTreeLearnNode, train_x, train_y, train_t): 48 | 49 | if train_x.shape[0] == 0: 50 | return node 51 | 52 | if node.node_depth > self.tree_depth: 53 | self.tree_depth = node.node_depth 54 | 55 | if self.max_depth == self.tree_depth: 56 | if node.effect > self.max_effect: 57 | self.max_effect = node.effect 58 | if node.effect < self.min_effect: 59 | self.min_effect = node.effect 60 | self.num_leaves += 1 61 | node.leaf_num = self.num_leaves 62 | node.is_leaf = True 63 | return node 64 | 65 | best_gain = 1.0 66 | best_attributes = [] 67 | best_tb_obj, best_fb_obj = (0.0, 0.0) 68 | 69 | column_count = train_x.shape[1] 70 | for col in range(0, column_count): 71 | unique_vals = np.unique(train_x[:, col]) 72 | 73 | if self.max_values is not None: 74 | if self.max_values < 1: 75 | idx = np.round(np.linspace( 76 | 0, len(unique_vals) - 1, self.max_values * len(unique_vals))).astype(int) 77 | unique_vals = unique_vals[idx] 78 | else: 79 | idx = np.round(np.linspace( 80 | 0, len(unique_vals) - 1, self.max_values)).astype(int) 81 | unique_vals = unique_vals[idx] 82 | 83 | for value in unique_vals: 84 | 85 | # check training data size 86 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 87 | = divide_set(train_x, train_y, train_t, col, value) 88 | check1 = check_min_size(self.min_size, train_t1) 89 | check2 = check_min_size(self.min_size, train_t2) 90 | if check1 or check2: 91 | continue 92 | 93 | t_stat, diff_pval = self._eval(train_y1, train_t1, train_y2, train_t2) 94 | 95 | gain = diff_pval 96 | 97 | if gain < best_gain and gain <= self.alpha: 98 | best_gain = gain 99 | best_attributes = [col, value] 100 | 101 | if best_gain <= self.alpha: 102 | node.col = best_attributes[0] 103 | node.value = best_attributes[1] 104 | 105 | (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \ 106 | = divide_set(train_x, train_y, train_t, node.col, node.value) 107 | 108 | y1 = train_y1 109 | y2 = train_y2 110 | t1 = train_t1 111 | t2 = train_t2 112 | 113 | best_tb_effect = ace(y1, t1) 114 | best_fb_effect = ace(y2, t2) 115 | tb_p_val = get_pval(y1, t1) 116 | fb_p_val = get_pval(y2, t2) 117 | 118 | self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj 119 | 120 | tb = BaseCausalTreeLearnNode(obj=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val, 121 | node_depth=node.node_depth + 1, 122 | num_samples=y1.shape[0]) 123 | fb = BaseCausalTreeLearnNode(obj=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val, 124 | node_depth=node.node_depth + 1, 125 | num_samples=y2.shape[0]) 126 | 127 | node.true_branch = self._fit(tb, train_x1, train_y1, train_t1) 128 | node.false_branch = self._fit(fb, train_x2, train_y2, train_t2) 129 | 130 | if node.effect > self.max_effect: 131 | self.max_effect = node.effect 132 | if node.effect < self.min_effect: 133 | self.min_effect = node.effect 134 | 135 | return node 136 | 137 | else: 138 | if node.effect > self.max_effect: 139 | self.max_effect = node.effect 140 | if node.effect < self.min_effect: 141 | self.min_effect = node.effect 142 | 143 | self.num_leaves += 1 144 | node.leaf_num = self.num_leaves 145 | node.is_leaf = True 146 | return node 147 | -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import errno 3 | import numpy as np 4 | from scipy.stats import ttest_ind 5 | import subprocess 6 | import time 7 | 8 | 9 | def check_dir(path): 10 | if not os.path.exists(os.path.dirname(path)): 11 | try: 12 | os.makedirs(os.path.dirname(path)) 13 | except OSError as exc: 14 | if exc.errno != errno.EEXIST: 15 | raise 16 | 17 | 18 | def divide_set(x, y, t, col, value): 19 | idx1 = x[:, col] >= value 20 | idx2 = ~idx1 21 | 22 | x1 = x[idx1] 23 | x2 = x[idx2] 24 | 25 | y1 = y[idx1] 26 | y2 = y[idx2] 27 | 28 | t1 = t[idx1] 29 | t2 = t[idx2] 30 | 31 | return x1, x2, y1, y2, t1, t2 32 | 33 | 34 | def tau_squared(y, t): 35 | total = y.shape[0] 36 | 37 | return_val = (-np.inf, -np.inf) 38 | 39 | if total == 0: 40 | return return_val 41 | 42 | treat_vect = t 43 | 44 | effect = ace(y, treat_vect) 45 | err = (effect ** 2) * total 46 | 47 | return effect 48 | 49 | 50 | def tau_squared_trigger(outcome, treatment, min_size=1, quartile=False): 51 | """Continuous case""" 52 | total = outcome.shape[0] 53 | 54 | return_val = (-np.inf, -np.inf) 55 | 56 | if total == 0: 57 | return return_val 58 | 59 | unique_treatment = np.unique(treatment) 60 | 61 | if unique_treatment.shape[0] == 1: 62 | return return_val 63 | 64 | unique_treatment = (unique_treatment[1:] + unique_treatment[:-1]) / 2 65 | unique_treatment = unique_treatment[1:-1] 66 | 67 | if quartile: 68 | first_quartile = int(np.floor(unique_treatment.shape[0] / 4)) 69 | third_quartile = int(np.ceil(3 * unique_treatment.shape[0] / 4)) 70 | 71 | unique_treatment = unique_treatment[first_quartile:third_quartile] 72 | 73 | yy = np.tile(outcome, (unique_treatment.shape[0], 1)) 74 | tt = np.tile(treatment, (unique_treatment.shape[0], 1)) 75 | 76 | x = np.transpose(np.transpose(tt) > unique_treatment) 77 | 78 | tt[x] = 1 79 | tt[np.logical_not(x)] = 0 80 | 81 | treat_num = np.sum(tt == 1, axis=1) 82 | cont_num = np.sum(tt == 0, axis=1) 83 | min_size_idx = np.where(np.logical_and( 84 | treat_num >= min_size, cont_num >= min_size)) 85 | 86 | unique_treatment = unique_treatment[min_size_idx] 87 | tt = tt[min_size_idx] 88 | yy = yy[min_size_idx] 89 | 90 | if tt.shape[0] == 0: 91 | return return_val 92 | 93 | y_t_m = np.sum((yy * (tt == 1)), axis=1) / np.sum(tt == 1, axis=1) 94 | y_c_m = np.sum((yy * (tt == 0)), axis=1) / np.sum(tt == 0, axis=1) 95 | 96 | effect = y_t_m - y_c_m 97 | err = effect ** 2 98 | 99 | max_err = np.argmax(err) 100 | 101 | best_effect = effect[max_err] 102 | best_err = err[max_err] 103 | best_split = unique_treatment[max_err] 104 | 105 | best_err = total * best_err 106 | 107 | return best_effect, best_split 108 | 109 | 110 | def ace(y, t): 111 | treat = t >= 0.5 112 | # control = t == 0 113 | control = ~treat 114 | 115 | yt = y[treat] 116 | yc = y[control] 117 | 118 | mu1 = 0.0 119 | mu0 = 0.0 120 | if yt.shape[0] != 0: 121 | mu1 = np.mean(yt) 122 | if yc.shape[0] != 0: 123 | mu0 = np.mean(yc) 124 | 125 | return mu1 - mu0 126 | 127 | 128 | def ace_trigger(y, t, trigger): 129 | treat = t >= trigger 130 | control = ~treat 131 | 132 | yt = y[treat] 133 | yc = y[control] 134 | 135 | mu1 = 0.0 136 | mu0 = 0.0 137 | if yt.shape[0] != 0: 138 | mu1 = np.mean(yt) 139 | if yc.shape[0] != 0: 140 | mu0 = np.mean(yc) 141 | 142 | return mu1 - mu0 143 | 144 | 145 | def get_pval(y, t): 146 | treat = t == 1 147 | # control = t == 0 148 | control = ~treat 149 | 150 | outcome_cont = y[treat] 151 | outcome_trt = y[control] 152 | 153 | p_val = ttest_ind(outcome_cont, outcome_trt)[1] 154 | 155 | if np.isnan(p_val): 156 | return 0.000 157 | 158 | return p_val 159 | 160 | 161 | def get_pval_trigger(y, t, trigger): 162 | treat = t >= trigger 163 | control = ~treat 164 | 165 | outcome_cont = y[treat] 166 | outcome_trt = y[control] 167 | 168 | p_val = ttest_ind(outcome_cont, outcome_trt)[1] 169 | 170 | if np.isnan(p_val): 171 | return 0.000 172 | 173 | return p_val 174 | 175 | 176 | def min_size_value_bool(min_size, t, trigger=0.5): 177 | nt, nc = get_treat_size(t, trigger=trigger) 178 | 179 | return nt, nc, nt < min_size or nc < min_size 180 | 181 | 182 | def check_min_size(min_size, t, trigger=0.5): 183 | nt, nc = get_treat_size(t, trigger) 184 | 185 | return nt < min_size or nc < min_size 186 | 187 | 188 | def get_treat_size(t, trigger=0.5): 189 | treated = t >= trigger 190 | control = ~treated 191 | num_treatment = t[treated].shape[0] 192 | num_control = t[control].shape[0] 193 | 194 | return num_treatment, num_control 195 | 196 | 197 | def variance(y, t): 198 | treat_vect = t 199 | 200 | treat = treat_vect == 1 201 | # control = treat_vect == 0 202 | control = ~treat 203 | 204 | if y.shape[0] == 0: 205 | return np.array([np.inf, np.inf]) 206 | 207 | yt = y[treat] 208 | yc = y[control] 209 | 210 | if yt.shape[0] == 0: 211 | var_t = np.var(y) 212 | else: 213 | var_t = np.var(yt) 214 | 215 | if yc.shape[0] == 0: 216 | var_c = np.var(y) 217 | else: 218 | var_c = np.var(yc) 219 | 220 | return var_t, var_c 221 | 222 | 223 | def variance_trigger(y, t, trigger): 224 | treat_vect = t 225 | 226 | treat = treat_vect >= trigger 227 | # control = treat_vect == 0 228 | control = ~treat 229 | 230 | if y.shape[0] == 0: 231 | return np.array([np.inf, np.inf]) 232 | 233 | yt = y[treat] 234 | yc = y[control] 235 | 236 | if yt.shape[0] == 0: 237 | var_t = np.var(y) 238 | else: 239 | var_t = np.var(yt) 240 | 241 | if yc.shape[0] == 0: 242 | var_c = np.var(y) 243 | else: 244 | var_c = np.var(yc) 245 | 246 | return var_t, var_c 247 | 248 | 249 | def col_dict(names): 250 | feat_names = {} 251 | for i, name in enumerate(names): 252 | column = "Column %s" % i 253 | feat_names[column] = name 254 | return feat_names 255 | -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/util_c.cpython-310-darwin.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/util_c.cpython-310-darwin.so -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/pehe_tree.py: -------------------------------------------------------------------------------- 1 | from CTL._tree import _CausalTree 2 | from CTL.causal_tree.nn_pehe.base import * 3 | from CTL.causal_tree.nn_pehe.val import * 4 | from CTL.causal_tree.nn_pehe.honest import * 5 | from CTL.causal_tree.nn_pehe.balance_split import * 6 | 7 | 8 | class PEHETree(_CausalTree): 9 | 10 | def __init__(self, min_size=2, max_depth=-1, k=1, 11 | val=False, split_size=0.5, 12 | honest=False, 13 | use_propensity=False, propensity_model=None, 14 | balance=False, 15 | seed=724): 16 | super().__init__() 17 | 18 | params = { 19 | "min_size": min_size, 20 | "max_depth": max_depth, 21 | "k": k, 22 | "seed": seed, 23 | "split_size": split_size, 24 | "use_propensity": use_propensity, 25 | "propensity_model": propensity_model 26 | } 27 | if val: 28 | self.tree = ValPEHE(**params) 29 | elif honest: 30 | self.tree = HonestPEHE(**params) 31 | elif balance: 32 | self.tree = BalanceBasePEHE(**params) 33 | else: 34 | self.tree = BasePEHE(**params) 35 | 36 | self.column_num = 0 37 | self.fitted = False 38 | self.tree_depth = 0 39 | 40 | self.obj = 0 41 | self.pehe = 0 42 | 43 | def fit(self, x, y, t): 44 | self.column_num = x.shape[1] 45 | x = x.astype(np.float64) 46 | y = y.astype(np.float64) 47 | t = t.astype(np.float64) 48 | self.tree.fit(x, y, t) 49 | self.fitted = True 50 | self.tree_depth = self.tree.tree_depth 51 | self.obj = self.tree.obj 52 | self.pehe = self.tree.pehe 53 | -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/sig_diff_tree.py: -------------------------------------------------------------------------------- 1 | from CTL._tree import _CausalTree 2 | from CTL.causal_tree.sig_diff.sig_base import SigTreeBase 3 | from CTL.causal_tree.sig_diff.sig_val import SigTreeVal 4 | import numpy as np 5 | 6 | 7 | class SigDiffTree(_CausalTree): 8 | 9 | def __init__(self, alpha=0.05, min_size=2, max_depth=-1, val=False, split_size=0.5, seed=724): 10 | super().__init__() 11 | 12 | params = { 13 | "alpha": alpha, 14 | "min_size": min_size, 15 | "max_depth": max_depth, 16 | "seed": seed, 17 | } 18 | if val: 19 | params["split_size"] = split_size 20 | self.tree = SigTreeVal(**params) 21 | else: 22 | self.tree = SigTreeBase(**params) 23 | 24 | self.column_num = 0 25 | self.fitted = False 26 | self.tree_depth = 0 27 | 28 | self.obj = 0 29 | 30 | def fit(self, x, y, t): 31 | self.column_num = x.shape[1] 32 | x = x.astype(np.float64) 33 | y = y.astype(np.float64) 34 | t = t.astype(np.float64) 35 | self.tree.fit(x, y, t) 36 | self.fitted = True 37 | self.tree_depth = self.tree.tree_depth 38 | self.obj = self.tree.obj 39 | -------------------------------------------------------------------------------- /build/lib.macosx-12.6-arm64-cpython-310/CTL/tree.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class Node(ABC): 5 | 6 | def __init__(self): 7 | self.is_leaf = False 8 | 9 | 10 | class Tree(ABC): 11 | 12 | def __init__(self): 13 | pass 14 | 15 | @abstractmethod 16 | def fit(self, x, y, t): 17 | pass 18 | 19 | @abstractmethod 20 | def predict(self, x): 21 | pass 22 | -------------------------------------------------------------------------------- /build/temp.macosx-12.6-arm64-cpython-310/CTL/causal_tree/util_c.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/build/temp.macosx-12.6-arm64-cpython-310/CTL/causal_tree/util_c.o -------------------------------------------------------------------------------- /causal_tree_learn.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 2.1 2 | Name: causal-tree-learn 3 | Version: 2.43 4 | Summary: Python implementation of causal trees with validation 5 | Home-page: https://github.com/edgeslab/CTL 6 | Author: Christopher Tran 7 | Author-email: ctran29@uic.edu 8 | Classifier: Programming Language :: Python :: 3 9 | Classifier: License :: OSI Approved :: MIT License 10 | Classifier: Operating System :: OS Independent 11 | Requires-Python: >=3.6 12 | Description-Content-Type: text/markdown 13 | License-File: LICENSE 14 | 15 | # CTL 16 | 17 | Christopher Tran, Elena Zheleva, ["Learning Triggers for Heterogeneous Treatment Effects", AAAI 2019.](https://arxiv.org/pdf/1902.00087.pdf) 18 | 19 | Our method is based on and adapted from: https://github.com/susanathey/causalTree 20 | 21 | 22 | ## Requirements 23 | * Python 3 24 | * sklearn 25 | * scipy 26 | * graphviz (if you want to plot the tree) 27 | 28 | ## Installation 29 | 30 | through pip 31 | 32 | ```bash 33 | pip install causal_tree_learn 34 | ``` 35 | 36 | or clone the repository 37 | ```bash 38 | python setup.py build_ext --inplace 39 | ``` 40 | 41 | ## Demo Code 42 | 43 | Two demo codes are available to run. 44 | 45 | ```bash 46 | python binary_example.py 47 | ``` 48 | Runs the tree on a binary example (asthma.txt) 49 | 50 | ```bash 51 | python trigger_example.py 52 | ``` 53 | Runs a tree on a trigger problem where the treatment is continuous (note for now the example is made up and treatment does not affect outcome, this is only to show example code) 54 | -------------------------------------------------------------------------------- /causal_tree_learn.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | LICENSE 2 | README.md 3 | pyproject.toml 4 | setup.py 5 | CTL/__init__.py 6 | CTL/_tree.py 7 | CTL/causal_learn_forest.py 8 | CTL/causal_tree_learn.py 9 | CTL/causal_tree_match.py 10 | CTL/pehe_tree.py 11 | CTL/sig_diff_tree.py 12 | CTL/tree.py 13 | CTL/causal_tree/__init__.py 14 | CTL/causal_tree/ct.py 15 | CTL/causal_tree/util.py 16 | CTL/causal_tree/util_c.c 17 | CTL/causal_tree/util_c.pyx 18 | CTL/causal_tree/ctl/__init__.py 19 | CTL/causal_tree/ctl/adaptive.py 20 | CTL/causal_tree/ctl/binary_ctl.py 21 | CTL/causal_tree/ctl/ctl_base.py 22 | CTL/causal_tree/ctl/ctl_honest.py 23 | CTL/causal_tree/ctl/ctl_val_honest.py 24 | CTL/causal_tree/ctl/honest.py 25 | CTL/causal_tree/ctl_match/__init__.py 26 | CTL/causal_tree/ctl_match/binary_ctl.py 27 | CTL/causal_tree/ctl_match/ctl_base.py 28 | CTL/causal_tree/ctl_trigger/__init__.py 29 | CTL/causal_tree/ctl_trigger/adaptive_trigger.py 30 | CTL/causal_tree/ctl_trigger/ctl_base_trigger.py 31 | CTL/causal_tree/ctl_trigger/ctl_honest_trigger.py 32 | CTL/causal_tree/ctl_trigger/ctl_val_honest_trigger.py 33 | CTL/causal_tree/ctl_trigger/honest_trigger.py 34 | CTL/causal_tree/ctl_trigger/trigger_ctl.py 35 | CTL/causal_tree/nn_pehe/__init__.py 36 | CTL/causal_tree/nn_pehe/balance_split.py 37 | CTL/causal_tree/nn_pehe/base.py 38 | CTL/causal_tree/nn_pehe/honest.py 39 | CTL/causal_tree/nn_pehe/tree.py 40 | CTL/causal_tree/nn_pehe/val.py 41 | CTL/causal_tree/r_tree/__init__.py 42 | CTL/causal_tree/r_tree/base.py 43 | CTL/causal_tree/r_tree/tree.py 44 | CTL/causal_tree/sig_diff/__init__.py 45 | CTL/causal_tree/sig_diff/sig.py 46 | CTL/causal_tree/sig_diff/sig_base.py 47 | CTL/causal_tree/sig_diff/sig_val.py 48 | causal_tree_learn.egg-info/PKG-INFO 49 | causal_tree_learn.egg-info/SOURCES.txt 50 | causal_tree_learn.egg-info/dependency_links.txt 51 | causal_tree_learn.egg-info/requires.txt 52 | causal_tree_learn.egg-info/top_level.txt -------------------------------------------------------------------------------- /causal_tree_learn.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /causal_tree_learn.egg-info/requires.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scikit-learn 3 | scipy 4 | -------------------------------------------------------------------------------- /causal_tree_learn.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | CTL 2 | -------------------------------------------------------------------------------- /dist/causal-tree-learn-2.43.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/dist/causal-tree-learn-2.43.tar.gz -------------------------------------------------------------------------------- /dist/causal_tree_learn-2.43-cp310-cp310-macosx_12_0_arm64.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/dist/causal_tree_learn-2.43-cp310-cp310-macosx_12_0_arm64.whl -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "causal-tree-learn" 3 | version = "2.42" 4 | description = "" 5 | authors = ["Christopher Tran "] 6 | license = "License :: OSI Approved :: MIT License" 7 | 8 | [tool.poetry.dependencies] 9 | python = ">=3.8,<3.12" 10 | numpy = "^1.23.3" 11 | scikit-learn = "^1.1.2" 12 | scipy = "^1.9.2" 13 | Cython = "^0.29.32" 14 | twine = "^4.0.1" 15 | 16 | [tool.poetry.dev-dependencies] 17 | 18 | [build-system] 19 | requires = ["poetry-core>=1.0.0"] 20 | build-backend = "poetry.core.masonry.api" 21 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # from setuptools import setup 2 | from setuptools import find_packages 3 | from distutils.core import setup 4 | from distutils.extension import Extension 5 | from Cython.Distutils import build_ext 6 | from Cython.Build import cythonize 7 | import numpy as np 8 | 9 | try: 10 | from Cython.Distutils import build_ext 11 | except ImportError: 12 | use_cython = False 13 | else: 14 | use_cython = True 15 | 16 | with open("README.md", "r") as fh: 17 | long_description = fh.read() 18 | 19 | cmdclass = {} 20 | ext_modules = [] 21 | 22 | if use_cython: 23 | ext_modules = [ 24 | Extension(name="CTL.causal_tree.util_c", sources=["CTL/causal_tree/util_c.pyx"], 25 | include_dirs=[np.get_include(), "."]), 26 | ] 27 | cmdclass.update({'build_ext': build_ext}) 28 | else: 29 | # ext_modules = [ 30 | # Extension(name="CTL.causal_tree.util_c", sources=["CTL/causal_tree/util_c.pyx", "CTL/causal_tree/util_c.c"], 31 | # include_dirs=[np.get_include(), "."]), 32 | # ] 33 | ext_modules = [ 34 | Extension(name="CTL.causal_tree.util_c", sources=["CTL/causal_tree/util_c.c"], 35 | include_dirs=[np.get_include(), "."]), 36 | ] 37 | 38 | 39 | setup( 40 | name="causal-tree-learn", 41 | version="2.43", 42 | author="Christopher Tran", 43 | author_email="ctran29@uic.edu", 44 | description="Python implementation of causal trees with validation", 45 | long_description=long_description, 46 | long_description_content_type="text/markdown", 47 | url="https://github.com/edgeslab/CTL", 48 | packages=find_packages(), 49 | classifiers=[ 50 | "Programming Language :: Python :: 3", 51 | "License :: OSI Approved :: MIT License", 52 | "Operating System :: OS Independent", 53 | ], 54 | install_requires=['numpy', 55 | 'scikit-learn', 56 | 'scipy' 57 | ], 58 | python_requires='>=3.6', 59 | ext_modules=cythonize(ext_modules), 60 | # cmdclass={'build_ext': build_ext}, 61 | cmdclass=cmdclass, 62 | setup_requires=["cython", "numpy"], 63 | package_data={"CTL.causal_tree": ["util_c.c", "util_c.pyx"]} 64 | ) 65 | -------------------------------------------------------------------------------- /trigger_example.py: -------------------------------------------------------------------------------- 1 | from CTL.causal_tree_learn import CausalTree 2 | from sklearn.model_selection import train_test_split 3 | import numpy as np 4 | 5 | np.random.seed(0) 6 | 7 | x = np.random.randn(100, 10) 8 | y = np.random.randn(100) 9 | treatment = np.random.randn(100) 10 | 11 | x_train, x_test, y_train, y_test, treat_train, treat_test = train_test_split(x, y, treatment, 12 | test_size=0.5, random_state=42) 13 | 14 | variable_names = [] 15 | for i in range(x.shape[1]): 16 | variable_names.append(f"Column {i}") 17 | 18 | # regular CTL 19 | ctl = CausalTree(cont=True) 20 | ctl.fit(x_train, y_train, treat_train) 21 | ctl_predict = ctl.predict(x_test) 22 | 23 | # honest CTL 24 | cth = CausalTree(cont=True, honest=True) 25 | cth.fit(x_train, y_train, treat_train) 26 | cth_predict = cth.predict(x_test) 27 | 28 | # val honest CTL 29 | cthv = CausalTree(cont=True, val_honest=True) 30 | cthv.fit(x_train, y_train, treat_train) 31 | cthv_predict = cthv.predict(x_test) 32 | 33 | # adaptive CT 34 | ct_adaptive = CausalTree(weight=0.0, split_size=0.0, cont=True) 35 | ct_adaptive.fit(x_train, y_train, treat_train) 36 | ct_adaptive_predict = ct_adaptive.predict(x_test) 37 | 38 | # honest CT 39 | ct_honest = CausalTree(honest=True, weight=0.0, split_size=0.0, cont=True) 40 | ct_honest.fit(x_train, y_train, treat_train) 41 | ct_honest_predict = ct_honest.predict(x_test) 42 | 43 | # to get which examples are in which leaf 44 | groups = cthv.get_groups(x_test) 45 | 46 | # to get triggers 47 | triggers = cthv.get_triggers(x_test) 48 | print(triggers) 49 | 50 | # to get features used, input the columns 51 | features_used = cthv.get_variables_used(variable_names) 52 | print(features_used) 53 | 54 | # to get the decision for every example 55 | features = cthv.get_features(x) 56 | print(features) 57 | 58 | # if you want to plot a tree 59 | cthv.plot_tree(filename="output/trigger_tree") 60 | 61 | 62 | --------------------------------------------------------------------------------