├── .gitignore
├── CTL
    ├── __init__.py
    ├── _tree.py
    ├── causal_learn_forest.py
    ├── causal_tree
    │   ├── __init__.py
    │   ├── ct.py
    │   ├── ctl
    │   │   ├── __init__.py
    │   │   ├── adaptive.py
    │   │   ├── binary_ctl.py
    │   │   ├── ctl_base.py
    │   │   ├── ctl_honest.py
    │   │   ├── ctl_val_honest.py
    │   │   └── honest.py
    │   ├── ctl_match
    │   │   ├── __init__.py
    │   │   ├── binary_ctl.py
    │   │   └── ctl_base.py
    │   ├── ctl_trigger
    │   │   ├── __init__.py
    │   │   ├── adaptive_trigger.py
    │   │   ├── ctl_base_trigger.py
    │   │   ├── ctl_honest_trigger.py
    │   │   ├── ctl_val_honest_trigger.py
    │   │   ├── honest_trigger.py
    │   │   └── trigger_ctl.py
    │   ├── nn_pehe
    │   │   ├── __init__.py
    │   │   ├── balance_split.py
    │   │   ├── base.py
    │   │   ├── honest.py
    │   │   ├── tree.py
    │   │   └── val.py
    │   ├── r_tree
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   └── tree.py
    │   ├── sig_diff
    │   │   ├── __init__.py
    │   │   ├── sig.py
    │   │   ├── sig_base.py
    │   │   └── sig_val.py
    │   ├── util.py
    │   ├── util_c.c
    │   ├── util_c.cpython-37m-darwin.so
    │   └── util_c.pyx
    ├── causal_tree_learn.py
    ├── causal_tree_match.py
    ├── pehe_tree.py
    ├── sig_diff_tree.py
    └── tree.py
├── LICENSE
├── README.md
├── binary_example.py
├── build
    ├── lib.macosx-12.6-arm64-cpython-310
    │   └── CTL
    │   │   ├── __init__.py
    │   │   ├── _tree.py
    │   │   ├── causal_learn_forest.py
    │   │   ├── causal_tree
    │   │       ├── __init__.py
    │   │       ├── ct.py
    │   │       ├── ctl
    │   │       │   ├── __init__.py
    │   │       │   ├── adaptive.py
    │   │       │   ├── binary_ctl.py
    │   │       │   ├── ctl_base.py
    │   │       │   ├── ctl_honest.py
    │   │       │   ├── ctl_val_honest.py
    │   │       │   └── honest.py
    │   │       ├── ctl_match
    │   │       │   ├── __init__.py
    │   │       │   ├── binary_ctl.py
    │   │       │   └── ctl_base.py
    │   │       ├── ctl_trigger
    │   │       │   ├── __init__.py
    │   │       │   ├── adaptive_trigger.py
    │   │       │   ├── ctl_base_trigger.py
    │   │       │   ├── ctl_honest_trigger.py
    │   │       │   ├── ctl_val_honest_trigger.py
    │   │       │   ├── honest_trigger.py
    │   │       │   └── trigger_ctl.py
    │   │       ├── nn_pehe
    │   │       │   ├── __init__.py
    │   │       │   ├── balance_split.py
    │   │       │   ├── base.py
    │   │       │   ├── honest.py
    │   │       │   ├── tree.py
    │   │       │   └── val.py
    │   │       ├── r_tree
    │   │       │   ├── __init__.py
    │   │       │   ├── base.py
    │   │       │   └── tree.py
    │   │       ├── sig_diff
    │   │       │   ├── __init__.py
    │   │       │   ├── sig.py
    │   │       │   ├── sig_base.py
    │   │       │   └── sig_val.py
    │   │       ├── util.py
    │   │       ├── util_c.c
    │   │       ├── util_c.cpython-310-darwin.so
    │   │       └── util_c.pyx
    │   │   ├── causal_tree_learn.py
    │   │   ├── causal_tree_match.py
    │   │   ├── pehe_tree.py
    │   │   ├── sig_diff_tree.py
    │   │   └── tree.py
    └── temp.macosx-12.6-arm64-cpython-310
    │   └── CTL
    │       └── causal_tree
    │           └── util_c.o
├── causal_tree_learn.egg-info
    ├── PKG-INFO
    ├── SOURCES.txt
    ├── dependency_links.txt
    ├── requires.txt
    └── top_level.txt
├── data
    └── asthma.txt
├── dist
    ├── causal-tree-learn-2.43.tar.gz
    └── causal_tree_learn-2.43-cp310-cp310-macosx_12_0_arm64.whl
├── poetry.lock
├── pyproject.toml
├── setup.py
└── trigger_example.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | output/
 2 | __pycache__/
 3 | .DS_store
 4 | .idea/
 5 | # build/
 6 | # causal_tree_learn.egg-info
 7 | # dist
 8 | /backup/
 9 | notebooks/
10 | 
11 | test.py
12 | test_script.py
13 | test_cython_trigger.py
14 | test_cython_binary.py
15 | test_cython.py
16 | binary_example_random.py
17 | test_trigger.py
18 | notebooks/data_generation.py
19 | notebooks/2020-04-28 - Vectorize with Matching.ipynb
20 | test2.py
21 | test3.py
22 | 
23 | .ipynb_checkpoints
24 | 
25 | test.*
26 | /dist (1)/
27 | 
28 | notes.txt
29 | .venv


--------------------------------------------------------------------------------
/CTL/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/CTL/__init__.py


--------------------------------------------------------------------------------
/CTL/causal_learn_forest.py:
--------------------------------------------------------------------------------
 1 | from CTL.causal_tree_learn import CausalTree
 2 | import numpy as np
 3 | 
 4 | 
 5 | class CausalTreeLearnForest:
 6 | 
 7 |     def __init__(self, num_trees=10, bootstrap=True, max_samples=None, max_features="auto", max_depth=-1,
 8 |                  val_honest=False, honest=False, min_size=2, split_size=0.5, weight=0.5, feature_batch_size=None,
 9 |                  seed=724):
10 | 
11 |         tree_params = {
12 |             "weight": weight,
13 |             "split_size": split_size,
14 |             "max_depth": max_depth,
15 |             "seed": seed,
16 |             "min_size": min_size,
17 |             "val_honest": val_honest,
18 |             "honest": honest,
19 |             "feature_batch_size": feature_batch_size,
20 |         }
21 | 
22 |         self.num_trees = num_trees
23 |         self.bootstrap = bootstrap
24 |         self.max_samples = max_samples
25 |         self.max_features = max_features
26 |         self.max_depth = max_depth
27 | 
28 |         self.trees = tuple(CausalTree(**tree_params) for i in range(num_trees))
29 | 
30 |     def fit(self, x, y, t):
31 |         x = x.astype(float)
32 |         y = y.astype(float)
33 |         t = t.astype(float)
34 | 
35 |         for tree in self.trees:
36 |             example_samples, feature_samples = self._sample(x)
37 | 
38 |             sample_x = x[np.ix_(example_samples, feature_samples)]
39 |             sample_y = y[example_samples]
40 |             sample_t = t[example_samples]
41 | 
42 |             tree.fit(sample_x, sample_y, sample_t)
43 | 
44 |     def predict(self, x):
45 |         predictions = np.zeros((self.num_trees, x.shape[0]))
46 |         for i, tree in enumerate(self.trees):
47 |             predictions[i] = tree.predict(x)
48 | 
49 |         return np.mean(predictions, axis=0)
50 | 
51 |     def _sample(self, x):
52 |         total_examples = x.shape[0]
53 |         total_features = x.shape[1]
54 | 
55 |         example_samples = self._sample_examples(total_examples)
56 |         feature_samples = self._feature_sample(total_features)
57 | 
58 |         return example_samples, feature_samples
59 | 
60 |     def _sample_examples(self, total_examples):
61 |         if self.bootstrap:
62 |             if self.max_samples:
63 |                 if isinstance(self.max_samples, float):
64 |                     example_samples = np.random.choice(np.arange(0, total_examples),
65 |                                                        size=int(self.max_samples * total_examples))
66 |                 elif isinstance(self.max_samples, int):
67 |                     example_samples = np.random.choice(np.arange(0, total_examples), size=self.max_samples)
68 |                 else:
69 |                     example_samples = np.random.choice(np.arange(0, total_examples), size=total_examples)
70 |             else:
71 |                 example_samples = np.random.choice(np.arange(0, total_examples), size=total_examples)
72 |         else:
73 |             example_samples = np.arange(0, total_examples)
74 | 
75 |         return example_samples
76 | 
77 |     def _feature_sample(self, total_features):
78 |         num_features = self._feature_sample_size(total_features)
79 |         feature_samples = np.random.permutation(total_features)[:num_features]
80 |         return feature_samples
81 | 
82 |     def _feature_sample_size(self, total_features):
83 |         num_features = total_features
84 |         if self.max_features == "auto" or self.max_features == "sqrt":
85 |             num_features = int(np.sqrt(num_features))
86 |         elif isinstance(self.max_features, int):
87 |             num_features = self.max_features
88 |         elif isinstance(self.max_features, float):
89 |             num_features = int(self.max_features * total_features)
90 |         return num_features
91 | 


--------------------------------------------------------------------------------
/CTL/causal_tree/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/CTL/causal_tree/__init__.py


--------------------------------------------------------------------------------
/CTL/causal_tree/ct.py:
--------------------------------------------------------------------------------
 1 | from CTL.tree import *
 2 | from abc import ABC, abstractmethod
 3 | 
 4 | 
 5 | class CTNode(ABC):
 6 | 
 7 |     def __init__(self):
 8 |         super().__init__()
 9 | 
10 | 
11 | class CausalTree(ABC):
12 | 
13 |     def __init__(self):
14 |         super().__init__()
15 | 
16 |         # the learning objective
17 |         self.obj = 0.0
18 |         # Haven't implemented "mse" yet
19 |         self.mse = 0.0
20 | 
21 |         # tree properties
22 |         self.tree_depth = 0
23 |         self.num_leaves = 0
24 | 
25 |     @abstractmethod
26 |     def fit(self, x, y, t):
27 |         pass
28 | 
29 |     @abstractmethod
30 |     def predict(self, x):
31 |         pass
32 | 


--------------------------------------------------------------------------------
/CTL/causal_tree/ctl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/CTL/causal_tree/ctl/__init__.py


--------------------------------------------------------------------------------
/CTL/causal_tree/ctl/adaptive.py:
--------------------------------------------------------------------------------
  1 | from CTL.causal_tree.ctl.binary_ctl import *
  2 | from sklearn.model_selection import train_test_split
  3 | 
  4 | 
  5 | class AdaptiveNode(CTLearnNode):
  6 | 
  7 |     def __init__(self, **kwargs):
  8 |         super().__init__(**kwargs)
  9 | 
 10 |         # self.obj = obj
 11 | 
 12 | 
 13 | # ----------------------------------------------------------------
 14 | # Base causal tree (ctl, base objective)
 15 | # ----------------------------------------------------------------
 16 | class AdaptiveTree(CTLearn):
 17 | 
 18 |     def __init__(self, **kwargs):
 19 |         super().__init__(**kwargs)
 20 |         self.root = AdaptiveNode()
 21 | 
 22 |     def adaptive_eval(self, train_y, train_t):
 23 |         total_train = train_y.shape[0]
 24 | 
 25 |         train_effect = ace(train_y, train_t)
 26 | 
 27 |         train_mse = total_train * (train_effect ** 2)
 28 | 
 29 |         obj = train_mse
 30 |         mse = total_train * (train_effect ** 2)
 31 | 
 32 |         return obj, mse
 33 | 
 34 |     def fit(self, x, y, t):
 35 |         if x.shape[0] == 0:
 36 |             return 0
 37 | 
 38 |         # ----------------------------------------------------------------
 39 |         # Seed
 40 |         # ----------------------------------------------------------------
 41 |         np.random.seed(self.seed)
 42 | 
 43 |         # ----------------------------------------------------------------
 44 |         # Verbosity?
 45 |         # ----------------------------------------------------------------
 46 | 
 47 |         # ----------------------------------------------------------------
 48 |         # Split data
 49 |         # ----------------------------------------------------------------
 50 | 
 51 |         self.root.num_samples = y.shape[0]
 52 |         # ----------------------------------------------------------------
 53 |         # effect and pvals
 54 |         # ----------------------------------------------------------------
 55 |         effect = tau_squared(y, t)
 56 |         p_val = get_pval(y, t)
 57 |         self.root.effect = effect
 58 |         self.root.p_val = p_val
 59 | 
 60 |         # ----------------------------------------------------------------
 61 |         # Not sure if i should eval in root or not
 62 |         # ----------------------------------------------------------------
 63 |         node_eval, mse = self.adaptive_eval(y, t)
 64 |         self.root.obj = node_eval
 65 | 
 66 |         # ----------------------------------------------------------------
 67 |         # Add control/treatment means
 68 |         # ----------------------------------------------------------------
 69 |         self.root.control_mean = np.mean(y[t == 0])
 70 |         self.root.treatment_mean = np.mean(y[t == 1])
 71 | 
 72 |         self.root.num_samples = x.shape[0]
 73 | 
 74 |         self._fit(self.root, x, y, t)
 75 | 
 76 |     def _fit(self, node: AdaptiveNode, train_x, train_y, train_t):
 77 | 
 78 |         if train_x.shape[0] == 0:
 79 |             return node
 80 | 
 81 |         if node.node_depth > self.tree_depth:
 82 |             self.tree_depth = node.node_depth
 83 | 
 84 |         if self.max_depth == self.tree_depth:
 85 |             if node.effect > self.max_effect:
 86 |                 self.max_effect = node.effect
 87 |             if node.effect < self.min_effect:
 88 |                 self.min_effect = node.effect
 89 |             self.num_leaves += 1
 90 |             node.leaf_num = self.num_leaves
 91 |             node.is_leaf = True
 92 |             return node
 93 | 
 94 |         best_gain = 0.0
 95 |         best_attributes = []
 96 |         best_tb_obj, best_fb_obj = (0.0, 0.0)
 97 | 
 98 |         column_count = train_x.shape[1]
 99 |         for col in range(0, column_count):
100 |             unique_vals = np.unique(train_x[:, col])
101 | 
102 |             if self.max_values is not None:
103 |                 if self.max_values < 1:
104 |                     idx = np.round(np.linspace(
105 |                         0, len(unique_vals) - 1, self.max_values * len(unique_vals))).astype(int)
106 |                     unique_vals = unique_vals[idx]
107 |                 else:
108 |                     idx = np.round(np.linspace(
109 |                         0, len(unique_vals) - 1, self.max_values)).astype(int)
110 |                     unique_vals = unique_vals[idx]
111 | 
112 |             for value in unique_vals:
113 | 
114 |                 # check training data size
115 |                 (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
116 |                     = divide_set(train_x, train_y, train_t, col, value)
117 |                 check1 = check_min_size(self.min_size, train_t1)
118 |                 check2 = check_min_size(self.min_size, train_t2)
119 |                 if check1 or check2:
120 |                     continue
121 | 
122 |                 tb_eval, tb_mse = self.adaptive_eval(train_y1, train_t1)
123 |                 fb_eval, fb_mse = self.adaptive_eval(train_y2, train_t2)
124 | 
125 |                 split_eval = (tb_eval + fb_eval)
126 |                 gain = -node.obj + split_eval
127 | 
128 |                 if gain > best_gain:
129 |                     best_gain = gain
130 |                     best_attributes = [col, value]
131 |                     best_tb_obj, best_fb_obj = (tb_eval, fb_eval)
132 | 
133 |         if best_gain > 0:
134 |             node.col = best_attributes[0]
135 |             node.value = best_attributes[1]
136 | 
137 |             (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
138 |                 = divide_set(train_x, train_y, train_t, node.col, node.value)
139 | 
140 |             y1 = train_y1
141 |             y2 = train_y2
142 |             t1 = train_t1
143 |             t2 = train_t2
144 | 
145 |             best_tb_effect = ace(y1, t1)
146 |             best_fb_effect = ace(y2, t2)
147 |             tb_p_val = get_pval(y1, t1)
148 |             fb_p_val = get_pval(y2, t2)
149 | 
150 |             self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj
151 | 
152 |             # ----------------------------------------------------------------
153 |             # Ignore "mse" here, come back to it later?
154 |             # ----------------------------------------------------------------
155 | 
156 |             tb = AdaptiveNode(obj=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val,
157 |                               node_depth=node.node_depth + 1,
158 |                               num_samples=y1.shape[0])
159 |             fb = AdaptiveNode(obj=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val,
160 |                               node_depth=node.node_depth + 1,
161 |                               num_samples=y2.shape[0])
162 | 
163 |             node.true_branch = self._fit(tb, train_x1, train_y1, train_t1)
164 |             node.false_branch = self._fit(fb, train_x2, train_y2, train_t2)
165 | 
166 |             if node.effect > self.max_effect:
167 |                 self.max_effect = node.effect
168 |             if node.effect < self.min_effect:
169 |                 self.min_effect = node.effect
170 | 
171 |             return node
172 | 
173 |         else:
174 |             if node.effect > self.max_effect:
175 |                 self.max_effect = node.effect
176 |             if node.effect < self.min_effect:
177 |                 self.min_effect = node.effect
178 | 
179 |             self.num_leaves += 1
180 |             node.leaf_num = self.num_leaves
181 |             node.is_leaf = True
182 |             return node
183 | 


--------------------------------------------------------------------------------
/CTL/causal_tree/ctl/ctl_base.py:
--------------------------------------------------------------------------------
  1 | from CTL.causal_tree.ctl.binary_ctl import *
  2 | from sklearn.model_selection import train_test_split
  3 | 
  4 | 
  5 | class BaseCausalTreeLearnNode(CTLearnNode):
  6 | 
  7 |     def __init__(self, **kwargs):
  8 |         super().__init__(**kwargs)
  9 | 
 10 |         # self.obj = obj
 11 | 
 12 | 
 13 | # ----------------------------------------------------------------
 14 | # Base causal tree (ctl, base objective)
 15 | # ----------------------------------------------------------------
 16 | class CausalTreeLearnBase(CTLearn):
 17 | 
 18 |     def __init__(self, **kwargs):
 19 |         super().__init__(**kwargs)
 20 |         self.root = BaseCausalTreeLearnNode()
 21 | 
 22 |     def fit(self, x, y, t):
 23 |         if x.shape[0] == 0:
 24 |             return 0
 25 | 
 26 |         # ----------------------------------------------------------------
 27 |         # Seed
 28 |         # ----------------------------------------------------------------
 29 |         np.random.seed(self.seed)
 30 | 
 31 |         # ----------------------------------------------------------------
 32 |         # Verbosity?
 33 |         # ----------------------------------------------------------------
 34 | 
 35 |         # ----------------------------------------------------------------
 36 |         # Split data
 37 |         # ----------------------------------------------------------------
 38 |         train_x, val_x, train_y, val_y, train_t, val_t = train_test_split(x, y, t, random_state=self.seed, shuffle=True,
 39 |                                                                           test_size=self.val_split)
 40 |         self.root.num_samples = train_y.shape[0]
 41 |         # ----------------------------------------------------------------
 42 |         # effect and pvals
 43 |         # ----------------------------------------------------------------
 44 |         effect = tau_squared(y, t)
 45 |         p_val = get_pval(y, t)
 46 |         self.root.effect = effect
 47 |         self.root.p_val = p_val
 48 | 
 49 |         # ----------------------------------------------------------------
 50 |         # Not sure if i should eval in root or not
 51 |         # ----------------------------------------------------------------
 52 |         node_eval, mse = self._eval(train_y, train_t, val_y, val_t)
 53 |         self.root.obj = node_eval
 54 | 
 55 |         # ----------------------------------------------------------------
 56 |         # Add control/treatment means
 57 |         # ----------------------------------------------------------------
 58 |         self.root.control_mean = np.mean(y[t == 0])
 59 |         self.root.treatment_mean = np.mean(y[t == 1])
 60 | 
 61 |         self.root.num_samples = x.shape[0]
 62 | 
 63 |         self._fit(self.root, train_x, train_y, train_t, val_x, val_y, val_t)
 64 | 
 65 |     def _fit(self, node: BaseCausalTreeLearnNode, train_x, train_y, train_t, val_x, val_y, val_t):
 66 | 
 67 |         if train_x.shape[0] == 0 or val_x.shape[0] == 0:
 68 |             return node
 69 | 
 70 |         if node.node_depth > self.tree_depth:
 71 |             self.tree_depth = node.node_depth
 72 | 
 73 |         if self.max_depth == self.tree_depth:
 74 |             if node.effect > self.max_effect:
 75 |                 self.max_effect = node.effect
 76 |             if node.effect < self.min_effect:
 77 |                 self.min_effect = node.effect
 78 |             self.num_leaves += 1
 79 |             node.leaf_num = self.num_leaves
 80 |             node.is_leaf = True
 81 |             return node
 82 | 
 83 |         best_gain = 0.0
 84 |         best_attributes = []
 85 |         best_tb_obj, best_fb_obj = (0.0, 0.0)
 86 | 
 87 |         column_count = train_x.shape[1]
 88 |         for col in range(0, column_count):
 89 |             unique_vals = np.unique(train_x[:, col])
 90 | 
 91 |             if self.max_values is not None:
 92 |                 if self.max_values < 1:
 93 |                     idx = np.round(np.linspace(
 94 |                         0, len(unique_vals) - 1, self.max_values * len(unique_vals))).astype(int)
 95 |                     unique_vals = unique_vals[idx]
 96 |                 else:
 97 |                     idx = np.round(np.linspace(
 98 |                         0, len(unique_vals) - 1, self.max_values)).astype(int)
 99 |                     unique_vals = unique_vals[idx]
100 | 
101 |             # using the faster evaluation with vector/matrix calculations
102 |             try:
103 |                 if self.feature_batch_size is None:
104 |                     split_obj, upper_obj, lower_obj, value = self._eval_fast(train_x, train_y, train_t, val_x, val_y,
105 |                                                                              val_t,
106 |                                                                              unique_vals, col)
107 |                     gain = -node.obj + split_obj
108 |                     if gain > best_gain:
109 |                         best_gain = gain
110 |                         best_attributes = [col, value]
111 |                         best_tb_obj, best_fb_obj = (upper_obj, lower_obj)
112 |                 else:
113 | 
114 |                     for x in batch(unique_vals, self.feature_batch_size):
115 |                         split_obj, upper_obj, lower_obj, value = self._eval_fast(train_x, train_y, train_t, val_x,
116 |                                                                                  val_y, val_t, x, col)
117 | 
118 |                         gain = -node.obj + split_obj
119 |                         if gain > best_gain:
120 |                             best_gain = gain
121 |                             best_attributes = [col, value]
122 |                             best_tb_obj, best_fb_obj = (upper_obj, lower_obj)
123 |             # if that fails (due to memory maybe?) then use the old calculation
124 |             except:
125 |                 for value in unique_vals:
126 | 
127 |                     (val_x1, val_x2, val_y1, val_y2, val_t1, val_t2) \
128 |                         = divide_set(val_x, val_y, val_t, col, value)
129 | 
130 |                     # check validation set size
131 |                     val_size = self.val_split * self.min_size if self.val_split * self.min_size > 2 else 2
132 |                     if check_min_size(val_size, val_t1) or check_min_size(val_size, val_t2):
133 |                         continue
134 | 
135 |                     # check training data size
136 |                     (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
137 |                         = divide_set(train_x, train_y, train_t, col, value)
138 |                     check1 = check_min_size(self.min_size, train_t1)
139 |                     check2 = check_min_size(self.min_size, train_t2)
140 |                     if check1 or check2:
141 |                         continue
142 | 
143 |                     tb_eval, tb_mse = self._eval(train_y1, train_t1, val_y1, val_t1)
144 |                     fb_eval, fb_mse = self._eval(train_y2, train_t2, val_y2, val_t2)
145 | 
146 |                     split_eval = (tb_eval + fb_eval)
147 |                     gain = -node.obj + split_eval
148 | 
149 |                     if gain > best_gain:
150 |                         best_gain = gain
151 |                         best_attributes = [col, value]
152 |                         best_tb_obj, best_fb_obj = (tb_eval, fb_eval)
153 | 
154 |         if best_gain > 0:
155 |             node.col = best_attributes[0]
156 |             node.value = best_attributes[1]
157 | 
158 |             # print(node.col)
159 | 
160 |             (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
161 |                 = divide_set(train_x, train_y, train_t, node.col, node.value)
162 | 
163 |             (val_x1, val_x2, val_y1, val_y2, val_t1, val_t2) \
164 |                 = divide_set(val_x, val_y, val_t, node.col, node.value)
165 | 
166 |             y1 = np.concatenate((train_y1, val_y1))
167 |             y2 = np.concatenate((train_y2, val_y2))
168 |             t1 = np.concatenate((train_t1, val_t1))
169 |             t2 = np.concatenate((train_t2, val_t2))
170 | 
171 |             best_tb_effect = ace(y1, t1)
172 |             best_fb_effect = ace(y2, t2)
173 |             tb_p_val = get_pval(y1, t1)
174 |             fb_p_val = get_pval(y2, t2)
175 | 
176 |             self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj
177 | 
178 |             # ----------------------------------------------------------------
179 |             # Ignore "mse" here, come back to it later?
180 |             # ----------------------------------------------------------------
181 | 
182 |             tb = BaseCausalTreeLearnNode(obj=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val,
183 |                                          node_depth=node.node_depth + 1,
184 |                                          num_samples=y1.shape[0])
185 |             fb = BaseCausalTreeLearnNode(obj=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val,
186 |                                          node_depth=node.node_depth + 1,
187 |                                          num_samples=y2.shape[0])
188 | 
189 |             node.true_branch = self._fit(tb, train_x1, train_y1, train_t1, val_x1, val_y1, val_t1)
190 |             node.false_branch = self._fit(fb, train_x2, train_y2, train_t2, val_x2, val_y2, val_t2)
191 | 
192 |             if node.effect > self.max_effect:
193 |                 self.max_effect = node.effect
194 |             if node.effect < self.min_effect:
195 |                 self.min_effect = node.effect
196 | 
197 |             return node
198 | 
199 |         else:
200 |             if node.effect > self.max_effect:
201 |                 self.max_effect = node.effect
202 |             if node.effect < self.min_effect:
203 |                 self.min_effect = node.effect
204 | 
205 |             self.num_leaves += 1
206 |             node.leaf_num = self.num_leaves
207 |             node.is_leaf = True
208 |             return node
209 | 


--------------------------------------------------------------------------------
/CTL/causal_tree/ctl_match/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/CTL/causal_tree/ctl_match/__init__.py


--------------------------------------------------------------------------------
/CTL/causal_tree/ctl_match/ctl_base.py:
--------------------------------------------------------------------------------
  1 | from CTL.causal_tree.ctl_match.binary_ctl import *
  2 | from sklearn.model_selection import train_test_split
  3 | 
  4 | 
  5 | class BaseCausalTreeLearnNode(CTLearnNode):
  6 | 
  7 |     def __init__(self, **kwargs):
  8 |         super().__init__(**kwargs)
  9 | 
 10 |         # self.obj = obj
 11 | 
 12 | 
 13 | # ----------------------------------------------------------------
 14 | # Base causal tree (ctl, base objective)
 15 | # ----------------------------------------------------------------
 16 | class CTLMatchBase(CTLMatch):
 17 | 
 18 |     def __init__(self, **kwargs):
 19 |         super().__init__(**kwargs)
 20 |         self.root = BaseCausalTreeLearnNode()
 21 | 
 22 |     def fit(self, x, y, t):
 23 |         if x.shape[0] == 0:
 24 |             return 0
 25 | 
 26 |         # ----------------------------------------------------------------
 27 |         # Seed
 28 |         # ----------------------------------------------------------------
 29 |         np.random.seed(self.seed)
 30 | 
 31 |         # ----------------------------------------------------------------
 32 |         # Verbosity?
 33 |         # ----------------------------------------------------------------
 34 | 
 35 |         # ----------------------------------------------------------------
 36 |         # Split data
 37 |         # ----------------------------------------------------------------
 38 |         train_x, val_x, train_y, val_y, train_t, val_t = train_test_split(x, y, t, random_state=self.seed, shuffle=True,
 39 |                                                                           test_size=self.val_split)
 40 | 
 41 |         self.normalizer.fit(train_x)
 42 | 
 43 |         self.root.num_samples = y.shape[0]
 44 |         # ----------------------------------------------------------------
 45 |         # effect and pvals
 46 |         # ----------------------------------------------------------------
 47 |         effect = tau_squared(y, t)
 48 |         p_val = get_pval(y, t)
 49 |         self.root.effect = effect
 50 |         self.root.p_val = p_val
 51 | 
 52 |         # ----------------------------------------------------------------
 53 |         # Not sure if i should eval in root or not
 54 |         # ----------------------------------------------------------------
 55 |         node_eval, mse = self._eval(train_y, train_t, val_y, val_t)
 56 |         self.root.obj = node_eval
 57 | 
 58 |         # ----------------------------------------------------------------
 59 |         # Add control/treatment means
 60 |         # ----------------------------------------------------------------
 61 |         self.root.control_mean = np.mean(y[t == 0])
 62 |         self.root.treatment_mean = np.mean(y[t == 1])
 63 | 
 64 |         self.root.num_samples = x.shape[0]
 65 | 
 66 |         self._fit(self.root, train_x, train_y, train_t, val_x, val_y, val_t)
 67 | 
 68 |     def _fit(self, node: BaseCausalTreeLearnNode, train_x, train_y, train_t, val_x, val_y, val_t):
 69 | 
 70 |         if train_x.shape[0] == 0 or val_x.shape[0] == 0:
 71 |             node.is_leaf = True
 72 |             return node
 73 | 
 74 |         if node.node_depth > self.tree_depth:
 75 |             self.tree_depth = node.node_depth
 76 | 
 77 |         if self.max_depth == self.tree_depth:
 78 |             self.num_leaves += 1
 79 |             node.leaf_num = self.num_leaves
 80 |             node.is_leaf = True
 81 |             return node
 82 | 
 83 |         best_gain = 0.0
 84 |         best_attributes = []
 85 |         best_tb_obj, best_fb_obj = (0.0, 0.0)
 86 | 
 87 |         column_count = train_x.shape[1]
 88 |         for col in range(0, column_count):
 89 |             unique_vals = np.unique(train_x[:, col])
 90 | 
 91 |             # ----------------------------------------------------------------
 92 |             # TODO: Max values stuff
 93 |             # ----------------------------------------------------------------
 94 | 
 95 |             # using the faster evaluation with vector/matrix calculations
 96 |             try:
 97 |                 if self.feature_batch_size is None:
 98 |                     split_obj, upper_obj, lower_obj, value = self._eval_fast(train_x, train_y, train_t, val_x, val_y,
 99 |                                                                              val_t,
100 |                                                                              unique_vals, col)
101 |                     gain = -node.obj + split_obj
102 |                     if gain > best_gain:
103 |                         best_gain = gain
104 |                         best_attributes = [col, value]
105 |                         best_tb_obj, best_fb_obj = (upper_obj, lower_obj)
106 |                 else:
107 | 
108 |                     for x in batch(unique_vals, self.feature_batch_size):
109 |                         split_obj, upper_obj, lower_obj, value = self._eval_fast(train_x, train_y, train_t, val_x,
110 |                                                                                  val_y, val_t, x, col)
111 | 
112 |                         gain = -node.obj + split_obj
113 |                         if gain > best_gain:
114 |                             best_gain = gain
115 |                             best_attributes = [col, value]
116 |                             best_tb_obj, best_fb_obj = (upper_obj, lower_obj)
117 |             # if that fails (due to memory maybe?) then use the old calculation
118 |             except:
119 |                 for value in unique_vals:
120 | 
121 |                     (val_x1, val_x2, val_y1, val_y2, val_t1, val_t2) \
122 |                         = divide_set(val_x, val_y, val_t, col, value)
123 | 
124 |                     # check validation set size
125 |                     val_size = self.val_split * self.min_size if self.val_split * self.min_size > 2 else 2
126 |                     if check_min_size(val_size, val_t1) or check_min_size(val_size, val_t2):
127 |                         continue
128 | 
129 |                     # check training data size
130 |                     (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
131 |                         = divide_set(train_x, train_y, train_t, col, value)
132 |                     check1 = check_min_size(self.min_size, train_t1)
133 |                     check2 = check_min_size(self.min_size, train_t2)
134 |                     if check1 or check2:
135 |                         continue
136 | 
137 |                     tb_eval, tb_mse = self._eval(train_y1, train_t1, val_y1, val_t1)
138 |                     fb_eval, fb_mse = self._eval(train_y2, train_t2, val_y2, val_t2)
139 | 
140 |                     split_eval = (tb_eval + fb_eval)
141 |                     gain = -node.obj + split_eval
142 | 
143 |                     if gain > best_gain:
144 |                         best_gain = gain
145 |                         best_attributes = [col, value]
146 |                         best_tb_obj, best_fb_obj = (tb_eval, fb_eval)
147 | 
148 |         if best_gain > 0:
149 |             node.col = best_attributes[0]
150 |             node.value = best_attributes[1]
151 | 
152 |             # print(node.col)
153 | 
154 |             (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
155 |                 = divide_set(train_x, train_y, train_t, node.col, node.value)
156 | 
157 |             (val_x1, val_x2, val_y1, val_y2, val_t1, val_t2) \
158 |                 = divide_set(val_x, val_y, val_t, node.col, node.value)
159 | 
160 |             y1 = np.concatenate((train_y1, val_y1))
161 |             y2 = np.concatenate((train_y2, val_y2))
162 |             t1 = np.concatenate((train_t1, val_t1))
163 |             t2 = np.concatenate((train_t2, val_t2))
164 | 
165 |             best_tb_effect = ace(y1, t1)
166 |             best_fb_effect = ace(y2, t2)
167 |             tb_p_val = get_pval(y1, t1)
168 |             fb_p_val = get_pval(y2, t2)
169 | 
170 |             self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj
171 | 
172 |             # ----------------------------------------------------------------
173 |             # Ignore "mse" here, come back to it later?
174 |             # ----------------------------------------------------------------
175 | 
176 |             tb = BaseCausalTreeLearnNode(obj=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val,
177 |                                          node_depth=node.node_depth + 1,
178 |                                          num_samples=y1.shape[0])
179 |             fb = BaseCausalTreeLearnNode(obj=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val,
180 |                                          node_depth=node.node_depth + 1,
181 |                                          num_samples=y2.shape[0])
182 | 
183 |             node.true_branch = self._fit(tb, train_x1, train_y1, train_t1, val_x1, val_y1, val_t1)
184 |             node.false_branch = self._fit(fb, train_x2, train_y2, train_t2, val_x2, val_y2, val_t2)
185 | 
186 |             if node.effect > self.max_effect:
187 |                 self.max_effect = node.effect
188 |             if node.effect < self.min_effect:
189 |                 self.min_effect = node.effect
190 | 
191 |             return node
192 | 
193 |         else:
194 |             if node.effect > self.max_effect:
195 |                 self.max_effect = node.effect
196 |             if node.effect < self.min_effect:
197 |                 self.min_effect = node.effect
198 | 
199 |             self.num_leaves += 1
200 |             node.leaf_num = self.num_leaves
201 |             node.is_leaf = True
202 |             return node
203 | 


--------------------------------------------------------------------------------
/CTL/causal_tree/ctl_trigger/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/CTL/causal_tree/ctl_trigger/__init__.py


--------------------------------------------------------------------------------
/CTL/causal_tree/ctl_trigger/adaptive_trigger.py:
--------------------------------------------------------------------------------
  1 | from CTL.causal_tree.ctl_trigger.trigger_ctl import *
  2 | from sklearn.model_selection import train_test_split
  3 | 
  4 | 
  5 | class AdaptiveTriggerNode(TriggerNode):
  6 | 
  7 |     def __init__(self, **kwargs):
  8 |         super().__init__(**kwargs)
  9 | 
 10 |         # self.obj = obj
 11 | 
 12 | 
 13 | # ----------------------------------------------------------------
 14 | # Base causal tree (ctl, base objective)
 15 | # ----------------------------------------------------------------
 16 | class AdaptiveTriggerTree(TriggerTree):
 17 | 
 18 |     def __init__(self, **kwargs):
 19 |         super().__init__(**kwargs)
 20 |         self.root = AdaptiveTriggerNode()
 21 | 
 22 |     def adaptive_eval(self, train_y, train_t):
 23 | 
 24 |         total_train = train_y.shape[0]
 25 |         return_val = (-np.inf, -np.inf, -np.inf)
 26 | 
 27 |         if total_train == 0:
 28 |             return return_val
 29 | 
 30 |         train_effect, best_trigger = tau_squared_trigger(train_y, train_t, self.min_size, self.quartile)
 31 | 
 32 |         if train_effect <= -np.inf:
 33 |             return return_val
 34 | 
 35 |         train_err = train_effect ** 2
 36 | 
 37 |         train_mse = total_train * train_err
 38 |         obj = train_mse
 39 | 
 40 |         best_obj = obj
 41 |         best_mse = train_err
 42 | 
 43 |         return best_obj, best_trigger, best_mse
 44 | 
 45 |     def fit(self, x, y, t):
 46 |         if x.shape[0] == 0:
 47 |             return 0
 48 | 
 49 |         # ----------------------------------------------------------------
 50 |         # Seed
 51 |         # ----------------------------------------------------------------
 52 |         np.random.seed(self.seed)
 53 | 
 54 |         # ----------------------------------------------------------------
 55 |         # Verbosity?
 56 |         # ----------------------------------------------------------------
 57 | 
 58 |         # ----------------------------------------------------------------
 59 |         # Split data
 60 |         # ----------------------------------------------------------------
 61 | 
 62 |         self.root.num_samples = y.shape[0]
 63 |         # ----------------------------------------------------------------
 64 |         # effect and pvals
 65 |         # ----------------------------------------------------------------
 66 |         effect, trigger = tau_squared_trigger(y, t, self.min_size, self.quartile)
 67 |         p_val = get_pval_trigger(y, t, trigger)
 68 |         self.root.effect = effect
 69 |         self.root.p_val = p_val
 70 |         self.root.trigger = trigger
 71 | 
 72 |         # ----------------------------------------------------------------
 73 |         # Not sure if i should eval in root or not
 74 |         # ----------------------------------------------------------------
 75 |         node_eval, trigger, mse = self.adaptive_eval(y, t)
 76 |         self.root.obj = node_eval
 77 | 
 78 |         # ----------------------------------------------------------------
 79 |         # Add control/treatment means
 80 |         # ----------------------------------------------------------------
 81 |         self.root.control_mean = np.mean(y[t >= trigger])
 82 |         self.root.treatment_mean = np.mean(y[t < trigger])
 83 | 
 84 |         self.root.num_samples = x.shape[0]
 85 | 
 86 |         self._fit(self.root, x, y, t)
 87 | 
 88 |     def _fit(self, node: AdaptiveTriggerNode, train_x, train_y, train_t):
 89 | 
 90 |         if train_x.shape[0] == 0:
 91 |             return node
 92 | 
 93 |         if node.node_depth > self.tree_depth:
 94 |             self.tree_depth = node.node_depth
 95 | 
 96 |         if self.max_depth == self.tree_depth:
 97 |             if node.effect > self.max_effect:
 98 |                 self.max_effect = node.effect
 99 |             if node.effect < self.min_effect:
100 |                 self.min_effect = node.effect
101 |             self.num_leaves += 1
102 |             node.leaf_num = self.num_leaves
103 |             node.is_leaf = True
104 |             return node
105 | 
106 |         best_gain = 0.0
107 |         best_attributes = []
108 |         best_tb_obj, best_fb_obj = (0.0, 0.0)
109 |         best_tb_trigger, best_fb_trigger = (0.0, 0.0)
110 | 
111 |         column_count = train_x.shape[1]
112 |         for col in range(0, column_count):
113 |             unique_vals = np.unique(train_x[:, col])
114 | 
115 |             if self.max_values is not None:
116 |                 if self.max_values < 1:
117 |                     idx = np.round(np.linspace(0, len(unique_vals) - 1, self.max_values * len(unique_vals))).astype(int)
118 |                     unique_vals = unique_vals[idx]
119 |                 else:
120 |                     idx = np.round(np.linspace(
121 |                         0, len(unique_vals) - 1, self.max_values)).astype(int)
122 |                     unique_vals = unique_vals[idx]
123 | 
124 |             for value in unique_vals:
125 | 
126 |                 # check training data size
127 |                 (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
128 |                     = divide_set(train_x, train_y, train_t, col, value)
129 |                 check1 = check_min_size(self.min_size, train_t1)
130 |                 check2 = check_min_size(self.min_size, train_t2)
131 |                 if check1 or check2:
132 |                     continue
133 | 
134 |                 tb_eval, tb_trigger, tb_mse = self.adaptive_eval(train_y1, train_t1)
135 |                 fb_eval, fb_trigger, fb_mse = self.adaptive_eval(train_y2, train_t2)
136 | 
137 |                 split_eval = (tb_eval + fb_eval)
138 |                 gain = -node.obj + split_eval
139 | 
140 |                 if gain > best_gain:
141 |                     best_gain = gain
142 |                     best_attributes = [col, value]
143 |                     best_tb_obj, best_fb_obj = (tb_eval, fb_eval)
144 |                     best_tb_trigger, best_fb_trigger = (tb_trigger, fb_trigger)
145 | 
146 |         if best_gain > 0:
147 |             node.col = best_attributes[0]
148 |             node.value = best_attributes[1]
149 | 
150 |             (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
151 |                 = divide_set(train_x, train_y, train_t, node.col, node.value)
152 | 
153 |             y1 = train_y1
154 |             y2 = train_y2
155 |             t1 = train_t1
156 |             t2 = train_t2
157 | 
158 |             best_tb_effect = ace(y1, t1)
159 |             best_fb_effect = ace(y2, t2)
160 |             tb_p_val = get_pval(y1, t1)
161 |             fb_p_val = get_pval(y2, t2)
162 | 
163 |             self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj
164 | 
165 |             # ----------------------------------------------------------------
166 |             # Ignore "mse" here, come back to it later?
167 |             # ----------------------------------------------------------------
168 | 
169 |             tb = AdaptiveTriggerNode(obj=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val,
170 |                                      node_depth=node.node_depth + 1,
171 |                                      num_samples=y1.shape[0], trigger=best_tb_trigger)
172 |             fb = AdaptiveTriggerNode(obj=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val,
173 |                                      node_depth=node.node_depth + 1,
174 |                                      num_samples=y2.shape[0], trigger=best_fb_trigger)
175 | 
176 |             node.true_branch = self._fit(tb, train_x1, train_y1, train_t1)
177 |             node.false_branch = self._fit(fb, train_x2, train_y2, train_t2)
178 | 
179 |             if node.effect > self.max_effect:
180 |                 self.max_effect = node.effect
181 |             if node.effect < self.min_effect:
182 |                 self.min_effect = node.effect
183 | 
184 |             return node
185 | 
186 |         else:
187 |             if node.effect > self.max_effect:
188 |                 self.max_effect = node.effect
189 |             if node.effect < self.min_effect:
190 |                 self.min_effect = node.effect
191 | 
192 |             self.num_leaves += 1
193 |             node.leaf_num = self.num_leaves
194 |             node.is_leaf = True
195 |             return node
196 | 


--------------------------------------------------------------------------------
/CTL/causal_tree/ctl_trigger/ctl_base_trigger.py:
--------------------------------------------------------------------------------
  1 | from CTL.causal_tree.ctl_trigger.trigger_ctl import *
  2 | from sklearn.model_selection import train_test_split
  3 | 
  4 | 
  5 | class TriggerBaseNode(TriggerNode):
  6 | 
  7 |     def __init__(self, **kwargs):
  8 |         super().__init__(**kwargs)
  9 | 
 10 | 
 11 | # ----------------------------------------------------------------
 12 | # Base causal tree (ctl, base objective)
 13 | # ----------------------------------------------------------------
 14 | class TriggerTreeBase(TriggerTree):
 15 | 
 16 |     def __init__(self, **kwargs):
 17 |         super().__init__(**kwargs)
 18 |         self.root = TriggerBaseNode()
 19 | 
 20 |     def fit(self, x, y, t):
 21 |         if x.shape[0] == 0:
 22 |             return 0
 23 | 
 24 |         # ----------------------------------------------------------------
 25 |         # Seed
 26 |         # ----------------------------------------------------------------
 27 |         np.random.seed(self.seed)
 28 | 
 29 |         # ----------------------------------------------------------------
 30 |         # Verbosity?
 31 |         # ----------------------------------------------------------------
 32 | 
 33 |         # ----------------------------------------------------------------
 34 |         # Split data
 35 |         # ----------------------------------------------------------------
 36 |         train_x, val_x, train_y, val_y, train_t, val_t = train_test_split(x, y, t, random_state=self.seed, shuffle=True,
 37 |                                                                           test_size=self.val_split)
 38 |         self.root.num_samples = y.shape[0]
 39 |         # ----------------------------------------------------------------
 40 |         # effect and pvals
 41 |         # ----------------------------------------------------------------
 42 |         effect, trigger = tau_squared_trigger(y, t, self.min_size, self.quartile)
 43 |         p_val = get_pval_trigger(y, t, trigger)
 44 |         self.root.effect = effect
 45 |         self.root.p_val = p_val
 46 |         self.root.trigger = trigger
 47 | 
 48 |         # ----------------------------------------------------------------
 49 |         # Not sure if i should eval in root or not
 50 |         # ----------------------------------------------------------------
 51 |         node_eval, trigger, mse = self._eval(train_y, train_t, val_y, val_t)
 52 |         self.root.obj = node_eval
 53 | 
 54 |         # ----------------------------------------------------------------
 55 |         # Add control/treatment means
 56 |         # ----------------------------------------------------------------
 57 |         self.root.control_mean = np.mean(y[t >= trigger])
 58 |         self.root.treatment_mean = np.mean(y[t < trigger])
 59 | 
 60 |         self.root.num_samples = x.shape[0]
 61 | 
 62 |         self._fit(self.root, train_x, train_y, train_t, val_x, val_y, val_t)
 63 | 
 64 |     def _fit(self, node: TriggerBaseNode, train_x, train_y, train_t, val_x, val_y, val_t):
 65 | 
 66 |         if train_x.shape[0] == 0 or val_x.shape[0] == 0:
 67 |             return node
 68 | 
 69 |         if node.node_depth > self.tree_depth:
 70 |             self.tree_depth = node.node_depth
 71 | 
 72 |         if self.max_depth == self.tree_depth:
 73 |             if node.effect > self.max_effect:
 74 |                 self.max_effect = node.effect
 75 |             if node.effect < self.min_effect:
 76 |                 self.min_effect = node.effect
 77 |             self.num_leaves += 1
 78 |             node.leaf_num = self.num_leaves
 79 |             node.is_leaf = True
 80 |             return node
 81 | 
 82 |         best_gain = 0.0
 83 |         best_attributes = []
 84 |         best_tb_obj, best_fb_obj = (0.0, 0.0)
 85 |         best_tb_trigger, best_fb_trigger = (0.0, 0.0)
 86 | 
 87 |         column_count = train_x.shape[1]
 88 |         for col in range(0, column_count):
 89 |             unique_vals = np.unique(train_x[:, col])
 90 | 
 91 |             if self.max_values is not None:
 92 |                 if self.max_values < 1:
 93 |                     idx = np.round(np.linspace(0, len(unique_vals) - 1, self.max_values * len(unique_vals))).astype(int)
 94 |                     unique_vals = unique_vals[idx]
 95 |                 else:
 96 |                     idx = np.round(np.linspace(
 97 |                         0, len(unique_vals) - 1, self.max_values)).astype(int)
 98 |                     unique_vals = unique_vals[idx]
 99 | 
100 |             for value in unique_vals:
101 | 
102 |                 (val_x1, val_x2, val_y1, val_y2, val_t1, val_t2) \
103 |                     = divide_set(val_x, val_y, val_t, col, value)
104 | 
105 |                 (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
106 |                     = divide_set(train_x, train_y, train_t, col, value)
107 | 
108 |                 tb_eval, tb_trigger, tb_mse = self._eval(train_y1, train_t1, val_y1, val_t1)
109 |                 fb_eval, fb_trigger, fb_mse = self._eval(train_y2, train_t2, val_y2, val_t2)
110 | 
111 |                 split_eval = (tb_eval + fb_eval)
112 |                 gain = -node.obj + split_eval
113 | 
114 |                 if gain > best_gain:
115 |                     best_gain = gain
116 |                     best_attributes = [col, value]
117 |                     best_tb_obj, best_fb_obj = (tb_eval, fb_eval)
118 |                     best_tb_trigger, best_fb_trigger = (tb_trigger, fb_trigger)
119 | 
120 |         if best_gain > 0:
121 |             node.col = best_attributes[0]
122 |             node.value = best_attributes[1]
123 | 
124 |             (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
125 |                 = divide_set(train_x, train_y, train_t, node.col, node.value)
126 | 
127 |             (val_x1, val_x2, val_y1, val_y2, val_t1, val_t2) \
128 |                 = divide_set(val_x, val_y, val_t, node.col, node.value)
129 | 
130 |             y1 = np.concatenate((train_y1, val_y1))
131 |             y2 = np.concatenate((train_y2, val_y2))
132 |             t1 = np.concatenate((train_t1, val_t1))
133 |             t2 = np.concatenate((train_t2, val_t2))
134 | 
135 |             best_tb_effect = ace_trigger(y1, t1, best_tb_trigger)
136 |             best_fb_effect = ace_trigger(y2, t2, best_fb_trigger)
137 |             tb_p_val = get_pval_trigger(y1, t1, best_tb_trigger)
138 |             fb_p_val = get_pval_trigger(y2, t2, best_fb_trigger)
139 | 
140 |             self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj
141 | 
142 |             # ----------------------------------------------------------------
143 |             # Ignore "mse" here, come back to it later?
144 |             # ----------------------------------------------------------------
145 | 
146 |             tb = TriggerBaseNode(obj=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val,
147 |                                  node_depth=node.node_depth + 1,
148 |                                  num_samples=y1.shape[0], trigger=best_tb_trigger)
149 |             fb = TriggerBaseNode(obj=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val,
150 |                                  node_depth=node.node_depth + 1,
151 |                                  num_samples=y2.shape[0], trigger=best_fb_trigger)
152 | 
153 |             node.true_branch = self._fit(tb, train_x1, train_y1, train_t1, val_x1, val_y1, val_t1)
154 |             node.false_branch = self._fit(fb, train_x2, train_y2, train_t2, val_x2, val_y2, val_t2)
155 | 
156 |             if node.effect > self.max_effect:
157 |                 self.max_effect = node.effect
158 |             if node.effect < self.min_effect:
159 |                 self.min_effect = node.effect
160 | 
161 |             return node
162 | 
163 |         else:
164 |             if node.effect > self.max_effect:
165 |                 self.max_effect = node.effect
166 |             if node.effect < self.min_effect:
167 |                 self.min_effect = node.effect
168 | 
169 |             self.num_leaves += 1
170 |             node.leaf_num = self.num_leaves
171 |             node.is_leaf = True
172 |             return node
173 | 


--------------------------------------------------------------------------------
/CTL/causal_tree/nn_pehe/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/CTL/causal_tree/nn_pehe/__init__.py


--------------------------------------------------------------------------------
/CTL/causal_tree/nn_pehe/balance_split.py:
--------------------------------------------------------------------------------
  1 | from CTL.causal_tree.nn_pehe.tree import *
  2 | 
  3 | 
  4 | class BaseNode(PEHENode):
  5 | 
  6 |     def __init__(self, **kwargs):
  7 |         super().__init__(**kwargs)
  8 | 
  9 |         # self.obj = obj
 10 | 
 11 | 
 12 | # ----------------------------------------------------------------
 13 | # Base causal tree (ctl, base objective)
 14 | # ----------------------------------------------------------------
 15 | class BalanceBasePEHE(PEHETree):
 16 | 
 17 |     def __init__(self, eval2=False, **kwargs):
 18 |         super().__init__(**kwargs)
 19 |         self.root = BaseNode()
 20 |         self.eval2 = eval2
 21 | 
 22 |     def fit(self, x, y, t):
 23 |         if x.shape[0] == 0:
 24 |             return 0
 25 | 
 26 |         # ----------------------------------------------------------------
 27 |         # Seed
 28 |         # ----------------------------------------------------------------
 29 |         np.random.seed(self.seed)
 30 | 
 31 |         self.root.num_samples = y.shape[0]
 32 |         self.num_training = y.shape[0]
 33 | 
 34 |         # ----------------------------------------------------------------
 35 |         # NN_effect estimates
 36 |         # use the overall datasets for nearest neighbor for now
 37 |         # ----------------------------------------------------------------
 38 |         nn_effect = self.compute_nn_effect(x, y, t, k=self.k)
 39 | 
 40 |         # ----------------------------------------------------------------
 41 |         # effect and pvals
 42 |         # ----------------------------------------------------------------
 43 |         effect = tau_squared(y, t)
 44 |         p_val = get_pval(y, t)
 45 |         self.root.effect = effect
 46 |         self.root.p_val = p_val
 47 | 
 48 |         # ----------------------------------------------------------------
 49 |         # Not sure if i should eval in root or not
 50 |         # ----------------------------------------------------------------
 51 |         nn_pehe = self._eval(y, t, nn_effect)
 52 |         self.root.pehe = nn_pehe
 53 |         self.pehe = self.root.pehe
 54 | 
 55 |         # ----------------------------------------------------------------
 56 |         # Add control/treatment means
 57 |         # ----------------------------------------------------------------
 58 |         self.root.control_mean = np.mean(y[t == 0])
 59 |         self.root.treatment_mean = np.mean(y[t == 1])
 60 | 
 61 |         self.root.num_samples = x.shape[0]
 62 | 
 63 |         self._fit(self.root, x, y, t, nn_effect)
 64 | 
 65 |         if self.num_leaves > 0:
 66 |             self.pehe = self.pehe / self.num_leaves
 67 | 
 68 |     def _eval(self, train_y, train_t, nn_effect):
 69 | 
 70 |         # treated = np.where(train_t == 1)[0]
 71 |         # control = np.where(train_t == 0)[0]
 72 |         # pred_effect = np.mean(train_y[treated]) - np.mean(train_y[control])
 73 |         pred_effect = ace(train_y, train_t)
 74 | 
 75 |         # nn_pehe = np.mean((nn_effect - pred_effect) ** 2)
 76 |         nn_pehe = np.sum((nn_effect - pred_effect) ** 2)
 77 | 
 78 |         return nn_pehe
 79 | 
 80 |     def _fit(self, node: BaseNode, train_x, train_y, train_t, nn_effect):
 81 | 
 82 |         if train_x.shape[0] == 0:
 83 |             return node
 84 | 
 85 |         if node.node_depth > self.tree_depth:
 86 |             self.tree_depth = node.node_depth
 87 | 
 88 |         if self.max_depth == self.tree_depth:
 89 |             self.num_leaves += 1
 90 |             node.leaf_num = self.num_leaves
 91 |             node.is_leaf = True
 92 |             return node
 93 | 
 94 |         # print(self.tree_depth, self.obj)
 95 | 
 96 |         best_gain = 0.0
 97 |         # best_gain = node.pehe  # min amount
 98 |         best_attributes = []
 99 |         best_tb_obj, best_fb_obj = (0.0, 0.0)
100 | 
101 |         column_count = train_x.shape[1]
102 |         for col in range(0, column_count):
103 |             unique_vals = np.unique(train_x[:, col])
104 | 
105 |             for value in unique_vals:
106 |                 # check training data size
107 |                 (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
108 |                     = divide_set(train_x, train_y, train_t, col, value)
109 |                 check1 = check_min_size(self.min_size, train_t1)
110 |                 check2 = check_min_size(self.min_size, train_t2)
111 |                 if check1 or check2:
112 |                     continue
113 |                 (_, _, nn_effect1, nn_effect2, _, _) \
114 |                     = divide_set(train_x, nn_effect, train_t, col, value)
115 | 
116 |                 tb_eval = self._eval(train_y1, train_t1, nn_effect1)
117 |                 fb_eval = self._eval(train_y2, train_t2, nn_effect2)
118 | 
119 |                 split_difference = np.abs(tb_eval - fb_eval)
120 | 
121 |                 split_eval = (tb_eval + fb_eval)
122 |                 gain = node.pehe - split_eval - split_difference
123 | 
124 |                 if gain > best_gain:
125 |                     best_gain = gain
126 |                     best_attributes = [col, value]
127 |                     best_tb_obj, best_fb_obj = (tb_eval, fb_eval)
128 |             # if self.eval2:
129 |             #     split_eval, value, tb_eval, fb_eval = self._eval2(unique_vals, train_x, train_y, train_t, nn_effect,
130 |             #                                                       col, node.pehe)
131 |             #
132 |             #     gain = node.pehe - split_eval
133 |             #
134 |             #     if gain > best_gain:
135 |             #         best_gain = gain
136 |             #         best_attributes = [col, value]
137 |             #         best_tb_obj, best_fb_obj = (tb_eval, fb_eval)
138 |             # else:
139 |             #     for value in unique_vals:
140 |             #         # check training data size
141 |             #         (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
142 |             #             = divide_set(train_x, train_y, train_t, col, value)
143 |             #         check1 = check_min_size(self.min_size, train_t1)
144 |             #         check2 = check_min_size(self.min_size, train_t2)
145 |             #         if check1 or check2:
146 |             #             continue
147 |             #         (_, _, nn_effect1, nn_effect2, _, _) \
148 |             #             = divide_set(train_x, nn_effect, train_t, col, value)
149 |             #
150 |             #         tb_eval = self._eval(train_y1, train_t1, nn_effect1)
151 |             #         fb_eval = self._eval(train_y2, train_t2, nn_effect2)
152 |             #
153 |             #         split_eval = (tb_eval + fb_eval)
154 |             #         gain = node.pehe - split_eval
155 |             #
156 |             #         if gain > best_gain:
157 |             #             best_gain = gain
158 |             #             best_attributes = [col, value]
159 |             #             best_tb_obj, best_fb_obj = (tb_eval, fb_eval)
160 | 
161 |         if best_gain > 0:
162 |             node.col = best_attributes[0]
163 |             node.value = best_attributes[1]
164 | 
165 |             (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
166 |                 = divide_set(train_x, train_y, train_t, node.col, node.value)
167 |             (_, _, nn_effect1, nn_effect2, _, _) \
168 |                 = divide_set(train_x, nn_effect, train_t, node.col, node.value)
169 | 
170 |             y1 = train_y1
171 |             y2 = train_y2
172 |             t1 = train_t1
173 |             t2 = train_t2
174 | 
175 |             best_tb_effect = ace(y1, t1)
176 |             best_fb_effect = ace(y2, t2)
177 |             tb_p_val = get_pval(y1, t1)
178 |             fb_p_val = get_pval(y2, t2)
179 | 
180 |             self.pehe = self.pehe - node.pehe + best_tb_obj + best_fb_obj
181 | 
182 |             tb = BaseNode(obj=best_tb_obj, pehe=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val,
183 |                           node_depth=node.node_depth + 1,
184 |                           num_samples=y1.shape[0])
185 |             fb = BaseNode(obj=best_fb_obj, pehe=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val,
186 |                           node_depth=node.node_depth + 1,
187 |                           num_samples=y2.shape[0])
188 | 
189 |             node.true_branch = self._fit(tb, train_x1, train_y1, train_t1, nn_effect1)
190 |             node.false_branch = self._fit(fb, train_x2, train_y2, train_t2, nn_effect2)
191 | 
192 |             if node.effect > self.max_effect:
193 |                 self.max_effect = node.effect
194 |             if node.effect < self.min_effect:
195 |                 self.min_effect = node.effect
196 | 
197 |             return node
198 | 
199 |         else:
200 |             if node.effect > self.max_effect:
201 |                 self.max_effect = node.effect
202 |             if node.effect < self.min_effect:
203 |                 self.min_effect = node.effect
204 | 
205 |             self.num_leaves += 1
206 |             node.leaf_num = self.num_leaves
207 |             node.is_leaf = True
208 |             return node
209 | 


--------------------------------------------------------------------------------
/CTL/causal_tree/nn_pehe/honest.py:
--------------------------------------------------------------------------------
  1 | from CTL.causal_tree.nn_pehe.tree import *
  2 | from sklearn.model_selection import train_test_split
  3 | 
  4 | 
  5 | class HonestNode(PEHENode):
  6 | 
  7 |     def __init__(self, **kwargs):
  8 |         super().__init__(**kwargs)
  9 | 
 10 |         # self.obj = obj
 11 | 
 12 | 
 13 | # ----------------------------------------------------------------
 14 | # Base causal tree (ctl, base objective)
 15 | # ----------------------------------------------------------------
 16 | class HonestPEHE(PEHETree):
 17 | 
 18 |     def __init__(self, **kwargs):
 19 |         super().__init__(**kwargs)
 20 |         self.root = HonestNode()
 21 | 
 22 |     def fit(self, x, y, t):
 23 |         if x.shape[0] == 0:
 24 |             return 0
 25 | 
 26 |         # ----------------------------------------------------------------
 27 |         # Seed
 28 |         # ----------------------------------------------------------------
 29 |         np.random.seed(self.seed)
 30 | 
 31 |         # ----------------------------------------------------------------
 32 |         # Split data
 33 |         # ----------------------------------------------------------------
 34 |         x, est_x, y, est_y, t, est_t = train_test_split(x, y, t, random_state=self.seed, shuffle=True,
 35 |                                                         test_size=0.5)
 36 |         self.root.num_samples = est_y.shape[0]
 37 |         self.num_training = y.shape[0]
 38 | 
 39 |         # ----------------------------------------------------------------
 40 |         # NN_effect estimates
 41 |         # use the overall datasets for nearest neighbor for now
 42 |         # ----------------------------------------------------------------
 43 |         nn_effect = compute_nn_effect(x, y, t, k=self.k)
 44 |         # val_nn_effect = compute_nn_effect(est_x, est_y, est_t, k=self.k)
 45 | 
 46 |         # ----------------------------------------------------------------
 47 |         # effect and pvals
 48 |         # ----------------------------------------------------------------
 49 |         effect = tau_squared(y, t)
 50 |         p_val = get_pval(y, t)
 51 |         self.root.effect = effect
 52 |         self.root.p_val = p_val
 53 | 
 54 |         # ----------------------------------------------------------------
 55 |         # Not sure if i should eval in root or not
 56 |         # ----------------------------------------------------------------
 57 |         nn_pehe = self._eval(y, t, nn_effect)
 58 |         self.root.obj = nn_pehe
 59 |         self.obj = self.root.obj
 60 | 
 61 |         # ----------------------------------------------------------------
 62 |         # Add control/treatment means
 63 |         # ----------------------------------------------------------------
 64 |         self.root.control_mean = np.mean(y[t == 0])
 65 |         self.root.treatment_mean = np.mean(y[t == 1])
 66 | 
 67 |         self.root.num_samples = x.shape[0]
 68 | 
 69 |         self._fit(self.root, x, y, t, nn_effect, est_x, est_y, est_t)
 70 | 
 71 |         if self.num_leaves > 0:
 72 |             self.obj = self.obj / self.num_leaves
 73 | 
 74 |     def _eval(self, train_y, train_t, nn_effect):
 75 | 
 76 |         # total_train = train_y.shape[0]
 77 | 
 78 |         # treated = np.where(train_t == 1)[0]
 79 |         # control = np.where(train_t == 0)[0]
 80 |         # pred_effect = np.mean(train_y[treated]) - np.mean(train_y[control])
 81 |         pred_effect = ace(train_y, train_t)
 82 | 
 83 |         # nn_pehe = np.mean((nn_effect - pred_effect) ** 2)
 84 |         nn_pehe = np.sum((nn_effect - pred_effect) ** 2)
 85 | 
 86 |         # val_effect = ace(val_y, val_t)
 87 |         # val_nn_pehe = np.sum((val_nn_effect - pred_effect) ** 2)
 88 |         # val_train_ratio = total_train / total_val
 89 |         # val_nn_pehe = val_nn_pehe * val_train_ratio
 90 |         # pehe_diff = np.abs(nn_pehe - val_nn_pehe)
 91 | 
 92 |         # cost = np.abs(total_train * pred_effect - total_train * val_effect)
 93 | 
 94 |         var_t, var_c = variance(train_y, train_t)
 95 | 
 96 |         return nn_pehe
 97 | 
 98 |     def _fit(self, node: HonestNode, train_x, train_y, train_t, nn_effect, est_x, est_y, est_t):
 99 | 
100 |         if train_x.shape[0] == 0:
101 |             return node
102 | 
103 |         if node.node_depth > self.tree_depth:
104 |             self.tree_depth = node.node_depth
105 | 
106 |         if self.max_depth == self.tree_depth:
107 |             if node.effect > self.max_effect:
108 |                 self.max_effect = node.effect
109 |             if node.effect < self.min_effect:
110 |                 self.min_effect = node.effect
111 |             self.num_leaves += 1
112 |             node.leaf_num = self.num_leaves
113 |             node.is_leaf = True
114 |             return node
115 | 
116 |         # print(self.tree_depth, self.obj)
117 | 
118 |         best_gain = 0.0
119 |         best_attributes = []
120 |         best_tb_obj, best_fb_obj = (0.0, 0.0)
121 | 
122 |         column_count = train_x.shape[1]
123 |         for col in range(0, column_count):
124 |             unique_vals = np.unique(train_x[:, col])
125 | 
126 |             for value in unique_vals:
127 |                 (est_x1, est_x2, est_y1, est_y2, est_t1, est_t2) \
128 |                     = divide_set(est_x, est_y, est_t, col, value)
129 | 
130 |                 # check est set size
131 |                 if check_min_size(self.min_size, est_t1) or check_min_size(self.min_size, est_t2):
132 |                     continue
133 | 
134 |                 # check training data size
135 |                 (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
136 |                     = divide_set(train_x, train_y, train_t, col, value)
137 |                 check1 = check_min_size(self.min_size, train_t1)
138 |                 check2 = check_min_size(self.min_size, train_t2)
139 |                 if check1 or check2:
140 |                     continue
141 |                 (_, _, nn_effect1, nn_effect2, _, _) \
142 |                     = divide_set(train_x, nn_effect, train_t, col, value)
143 | 
144 |                 tb_eval = self._eval(train_y1, train_t1, nn_effect1)
145 |                 fb_eval = self._eval(train_y2, train_t2, nn_effect2)
146 | 
147 |                 split_eval = (tb_eval + fb_eval)
148 |                 gain = node.obj - split_eval
149 | 
150 |                 if gain > best_gain:
151 |                     best_gain = gain
152 |                     best_attributes = [col, value]
153 |                     best_tb_obj, best_fb_obj = (tb_eval, fb_eval)
154 | 
155 |                 # print(tb_eval, fb_eval, gain, best_gain)
156 | 
157 |         if best_gain > 0:
158 |             node.col = best_attributes[0]
159 |             node.value = best_attributes[1]
160 | 
161 |             (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
162 |                 = divide_set(train_x, train_y, train_t, node.col, node.value)
163 |             (est_x1, est_x2, est_y1, est_y2, est_t1, est_t2) \
164 |                 = divide_set(est_x, est_y, est_t, node.col, node.value)
165 |             (_, _, nn_effect1, nn_effect2, _, _) \
166 |                 = divide_set(train_x, nn_effect, train_t, node.col, node.value)
167 | 
168 |             # y1 = train_y1
169 |             # y2 = train_y2
170 |             # t1 = train_t1
171 |             # t2 = train_t2
172 |             # y1 = np.concatenate((train_y1, val_y1))
173 |             # y2 = np.concatenate((train_y2, val_y2))
174 |             # t1 = np.concatenate((train_t1, val_t1))
175 |             # t2 = np.concatenate((train_t2, val_t2))
176 |             y1 = est_y1
177 |             y2 = est_y2
178 |             t1 = est_t1
179 |             t2 = est_t2
180 | 
181 |             best_tb_effect = ace(y1, t1)
182 |             best_fb_effect = ace(y2, t2)
183 |             tb_p_val = get_pval(y1, t1)
184 |             fb_p_val = get_pval(y2, t2)
185 | 
186 |             self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj
187 | 
188 |             tb = HonestNode(obj=best_tb_obj, pehe=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val,
189 |                             node_depth=node.node_depth + 1,
190 |                             num_samples=train_y1.shape[0])
191 |             fb = HonestNode(obj=best_fb_obj, pehe=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val,
192 |                             node_depth=node.node_depth + 1,
193 |                             num_samples=train_y2.shape[0])
194 | 
195 |             node.true_branch = self._fit(tb, train_x1, train_y1, train_t1, nn_effect1, est_x1, est_y1, est_t1)
196 |             node.false_branch = self._fit(fb, train_x2, train_y2, train_t2, nn_effect2, est_x2, est_y2, est_t2)
197 | 
198 |             if node.effect > self.max_effect:
199 |                 self.max_effect = node.effect
200 |             if node.effect < self.min_effect:
201 |                 self.min_effect = node.effect
202 | 
203 |             return node
204 | 
205 |         else:
206 |             if node.effect > self.max_effect:
207 |                 self.max_effect = node.effect
208 |             if node.effect < self.min_effect:
209 |                 self.min_effect = node.effect
210 | 
211 |             self.num_leaves += 1
212 |             node.leaf_num = self.num_leaves
213 |             node.is_leaf = True
214 |             return node
215 | 


--------------------------------------------------------------------------------
/CTL/causal_tree/nn_pehe/tree.py:
--------------------------------------------------------------------------------
  1 | try:
  2 |     from CTL.causal_tree.util_c import *
  3 | except:
  4 |     from CTL.causal_tree.util import *
  5 | from CTL.causal_tree.ct import *
  6 | import numpy as np
  7 | from scipy.spatial import cKDTree
  8 | 
  9 | 
 10 | # TODO: Add weighting on evaluations
 11 | # TODO: add weighting on k > 1 nearest neighbors?
 12 | 
 13 | def compute_nn_effect(x, y, t, k=1):
 14 |     kdtree = cKDTree(x)
 15 |     d, idx = kdtree.query(x, k=x.shape[0])
 16 |     idx = idx[:, 1:]
 17 |     treated = np.where(t == 1)[0]
 18 |     control = np.where(t == 0)[0]
 19 |     bool_treated = np.isin(idx, treated)
 20 |     bool_control = np.isin(idx, control)
 21 | 
 22 |     nn_effect = np.zeros(x.shape[0])
 23 |     for i in range(len(bool_treated)):
 24 |         i_treat_idx = np.where(bool_treated[i, :])[0][:k]
 25 |         i_control_idx = np.where(bool_control[i, :])[0][:k]
 26 | 
 27 |         i_treat_nn = y[idx[i, i_treat_idx]]
 28 |         i_cont_nn = y[idx[i, i_control_idx]]
 29 | 
 30 |         nn_effect[i] = np.mean(i_treat_nn) - np.mean(i_cont_nn)
 31 | 
 32 |     return nn_effect
 33 | 
 34 | 
 35 | class PEHENode(CTNode):
 36 | 
 37 |     def __init__(self, p_val=1.0, effect=0.0, node_depth=0, control_mean=0.0, treatment_mean=0.0, col=-1, value=-1,
 38 |                  is_leaf=False, leaf_num=-1, num_samples=0.0, obj=0.0, pehe=0.0):
 39 |         super().__init__()
 40 |         # not tree specific features (most likely added at creation)
 41 |         self.p_val = p_val
 42 |         self.effect = effect
 43 |         self.node_depth = node_depth
 44 |         self.control_mean = control_mean
 45 |         self.treatment_mean = treatment_mean
 46 | 
 47 |         # during tree building
 48 |         self.obj = obj
 49 |         self.num_samples = num_samples
 50 |         self.pehe = pehe
 51 | 
 52 |         # after building tree
 53 |         self.col = col
 54 |         self.value = value
 55 |         self.is_leaf = is_leaf
 56 |         self.leaf_num = leaf_num
 57 |         self.true_branch = None
 58 |         self.false_branch = None
 59 | 
 60 |         # after calling functions
 61 |         self.column_name = ""
 62 |         self.decision = ""
 63 | 
 64 | 
 65 | class PEHETree(CausalTree):
 66 | 
 67 |     def __init__(self, split_size=0.5, max_depth=-1, min_size=2, max_values=None, verbose=False,
 68 |                  k=1, use_propensity=False, propensity_model=None,
 69 |                  seed=724):
 70 |         super().__init__()
 71 |         self.val_split = split_size
 72 |         self.max_depth = max_depth
 73 |         self.min_size = min_size
 74 |         self.seed = seed
 75 | 
 76 |         self.max_values = max_values
 77 |         self.verbose = verbose
 78 | 
 79 |         self.max_effect = 0.0
 80 |         self.min_effect = 0.0
 81 | 
 82 |         self.features = None
 83 | 
 84 |         self.k = k
 85 |         self.num_training = 1
 86 |         self.pehe = 0
 87 |         self.use_propensity = use_propensity
 88 |         if use_propensity:
 89 |             if propensity_model is not None:
 90 |                 self.proensity_model = propensity_model
 91 |             else:
 92 |                 from sklearn.linear_model import LogisticRegression
 93 |                 self.proensity_model = LogisticRegression()
 94 | 
 95 |         self.root = PEHENode()
 96 | 
 97 |     def compute_nn_effect(self, x, y, t, k=1):
 98 |         if self.use_propensity:
 99 |             self.proensity_model.fit(x, t)
100 |             propensity = self.proensity_model.predict_proba(x)[:, 1:]
101 |             kdtree = cKDTree(propensity)
102 |             _, idx = kdtree.query(propensity, k=x.shape[0])
103 |         else:
104 |             kdtree = cKDTree(x)
105 |             _, idx = kdtree.query(x, k=x.shape[0])
106 |         idx = idx[:, 1:]
107 |         treated = np.where(t == 1)[0]
108 |         control = np.where(t == 0)[0]
109 |         bool_treated = np.isin(idx, treated)
110 |         bool_control = np.isin(idx, control)
111 | 
112 |         nn_effect = np.zeros(x.shape)
113 |         for i in range(len(bool_treated)):
114 |             i_treat_idx = np.where(bool_treated[i, :])[0][:k]
115 |             i_control_idx = np.where(bool_control[i, :])[0][:k]
116 | 
117 |             i_treat_nn = y[idx[i, i_treat_idx]]
118 |             i_cont_nn = y[idx[i, i_control_idx]]
119 | 
120 |             nn_effect[i] = np.mean(i_treat_nn) - np.mean(i_cont_nn)
121 | 
122 |         return nn_effect
123 | 
124 |     @abstractmethod
125 |     def fit(self, x, y, t):
126 |         pass
127 | 
128 |     def predict(self, x):
129 | 
130 |         def _predict(node: PEHENode, observation):
131 |             if node.is_leaf:
132 |                 return node.effect
133 |             else:
134 |                 v = observation[node.col]
135 |                 if v >= node.value:
136 |                     branch = node.true_branch
137 |                 else:
138 |                     branch = node.false_branch
139 | 
140 |             return _predict(branch, observation)
141 | 
142 |         if len(x.shape) == 1:
143 |             prediction = _predict(self.root, x)
144 |             return prediction
145 | 
146 |         num_test = x.shape[0]
147 | 
148 |         prediction = np.zeros(num_test)
149 | 
150 |         for i in range(num_test):
151 |             test_example = x[i, :]
152 |             prediction[i] = _predict(self.root, test_example)
153 | 
154 |         return prediction
155 | 
156 |     def get_groups(self, x):
157 | 
158 |         def _get_group(node: PEHENode, observation):
159 |             if node.is_leaf:
160 |                 return node.leaf_num
161 |             else:
162 |                 v = observation[node.col]
163 |                 if v >= node.value:
164 |                     branch = node.true_branch
165 |                 else:
166 |                     branch = node.false_branch
167 | 
168 |             return _get_group(branch, observation)
169 | 
170 |         if len(x.shape) == 1:
171 |             return _get_group(self.root, x)
172 |         num_test = x.shape[0]
173 |         leaf_results = np.zeros(num_test)
174 | 
175 |         for i in range(num_test):
176 |             test_example = x[i, :]
177 |             leaf_results[i] = _get_group(self.root, test_example)
178 | 
179 |         return leaf_results
180 | 
181 |     def get_features(self, x):
182 | 
183 |         def _get_features(node: PEHENode, observation, features):
184 |             if node.is_leaf:
185 |                 return features
186 |             else:
187 |                 v = observation[node.col]
188 |                 if v >= node.value:
189 |                     branch = node.true_branch
190 |                 else:
191 |                     branch = node.false_branch
192 | 
193 |             features.append(node.decision)
194 |             return _get_features(branch, observation, features)
195 | 
196 |         if len(x.shape) == 1:
197 |             features = []
198 |             return _get_features(self.root, x, features)
199 |         num_test = x.shape[0]
200 |         leaf_features = []
201 | 
202 |         for i in range(num_test):
203 |             features = []
204 |             test_example = x[i, :]
205 |             leaf_features.append(_get_features(self.root, test_example, features))
206 | 
207 |         return leaf_features
208 | 
209 |     def prune(self, alpha=0.05):
210 | 
211 |         def _prune(node: PEHENode):
212 |             if node.true_branch is None or node.false_branch is None:
213 |                 return
214 | 
215 |             # recursive call for each branch
216 |             if not node.true_branch.is_leaf:
217 |                 _prune(node.true_branch)
218 |             if not node.false_branch.is_leaf:
219 |                 _prune(node.false_branch)
220 | 
221 |             # merge leaves (potentially)
222 |             if node.true_branch.is_leaf and node.false_branch.is_leaf:
223 |                 # Get branches
224 |                 tb = node.true_branch
225 |                 fb = node.false_branch
226 | 
227 |                 tb_pval = tb.p_val
228 |                 fb_pval = fb.p_val
229 | 
230 |                 if tb_pval > alpha and fb_pval > alpha:
231 |                     node.leaf_num = node.true_branch.leaf_num
232 |                     node.true_branch = None
233 |                     node.false_branch = None
234 |                     self.num_leaves = self.num_leaves - 1
235 |                     node.is_leaf = True
236 | 
237 |                     # ----------------------------------------------------------------
238 |                     # Something about obj/mse? if that is added
239 |                     #
240 |                     # - can do a self function so that tree references itself/it's own type of node?
241 |                     # ----------------------------------------------------------------
242 |                     if tb.node_depth == self.tree_depth:
243 |                         self.tree_depth = self.tree_depth - 1
244 | 
245 |         _prune(self.root)
246 | 
247 |     def get_triggers(self, x):
248 |         pass
249 | 
250 |     def save(self, filename):
251 |         import pickle as pkl
252 | 
253 |         check_dir(filename)
254 |         with open(filename, "wb") as file:
255 |             pkl.dump(self, file)
256 | 


--------------------------------------------------------------------------------
/CTL/causal_tree/r_tree/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/CTL/causal_tree/r_tree/__init__.py


--------------------------------------------------------------------------------
/CTL/causal_tree/r_tree/tree.py:
--------------------------------------------------------------------------------
  1 | try:
  2 |     from CTL.causal_tree.util_c import *
  3 | except:
  4 |     from CTL.causal_tree.util import *
  5 | from CTL.causal_tree.ct import *
  6 | import numpy as np
  7 | from scipy.spatial import cKDTree
  8 | 
  9 | 
 10 | # TODO: Add weighting on evaluations
 11 | # TODO: add weighting on k > 1 nearest neighbors?
 12 | 
 13 | def compute_nn_effect(x, y, t, k=1):
 14 |     kdtree = cKDTree(x)
 15 |     d, idx = kdtree.query(x, k=x.shape[0])
 16 |     idx = idx[:, 1:]
 17 |     treated = np.where(t == 1)[0]
 18 |     control = np.where(t == 0)[0]
 19 |     bool_treated = np.isin(idx, treated)
 20 |     bool_control = np.isin(idx, control)
 21 | 
 22 |     nn_effect = np.zeros(x.shape)
 23 |     for i in range(len(bool_treated)):
 24 |         i_treat_idx = np.where(bool_treated[i, :])[0][:k]
 25 |         i_control_idx = np.where(bool_control[i, :])[0][:k]
 26 | 
 27 |         i_treat_nn = y[idx[i, i_treat_idx]]
 28 |         i_cont_nn = y[idx[i, i_control_idx]]
 29 | 
 30 |         nn_effect[i] = np.mean(i_treat_nn) - np.mean(i_cont_nn)
 31 | 
 32 |     return nn_effect
 33 | 
 34 | 
 35 | class RNode(CTNode):
 36 | 
 37 |     def __init__(self, p_val=1.0, effect=0.0, node_depth=0, control_mean=0.0, treatment_mean=0.0, col=-1, value=-1,
 38 |                  is_leaf=False, leaf_num=-1, num_samples=0.0, obj=0.0, pehe=0.0):
 39 |         super().__init__()
 40 |         # not tree specific features (most likely added at creation)
 41 |         self.p_val = p_val
 42 |         self.effect = effect
 43 |         self.node_depth = node_depth
 44 |         self.control_mean = control_mean
 45 |         self.treatment_mean = treatment_mean
 46 | 
 47 |         # during tree building
 48 |         self.obj = obj
 49 |         self.num_samples = num_samples
 50 |         self.pehe = pehe
 51 | 
 52 |         # after building tree
 53 |         self.col = col
 54 |         self.value = value
 55 |         self.is_leaf = is_leaf
 56 |         self.leaf_num = leaf_num
 57 |         self.true_branch = None
 58 |         self.false_branch = None
 59 | 
 60 |         # after calling functions
 61 |         self.column_name = ""
 62 |         self.decision = ""
 63 | 
 64 | 
 65 | class RTree(CausalTree):
 66 | 
 67 |     def __init__(self, split_size=0.5, max_depth=-1, min_size=2, max_values=None, verbose=False,
 68 |                  k=1, use_propensity=False, propensity_model=None,
 69 |                  seed=724):
 70 |         super().__init__()
 71 |         self.val_split = split_size
 72 |         self.max_depth = max_depth
 73 |         self.min_size = min_size
 74 |         self.seed = seed
 75 | 
 76 |         self.max_values = max_values
 77 |         self.verbose = verbose
 78 | 
 79 |         self.max_effect = 0.0
 80 |         self.min_effect = 0.0
 81 | 
 82 |         self.features = None
 83 | 
 84 |         self.k = k
 85 |         self.num_training = 1
 86 |         self.pehe = 0
 87 |         self.use_propensity = use_propensity
 88 |         if use_propensity:
 89 |             if propensity_model is not None:
 90 |                 self.proensity_model = propensity_model
 91 |             else:
 92 |                 from sklearn.linear_model import LogisticRegression
 93 |                 self.proensity_model = LogisticRegression()
 94 | 
 95 |         self.root = RNode()
 96 | 
 97 |     def compute_nn_effect(self, x, y, t, k=1):
 98 |         if self.use_propensity:
 99 |             self.proensity_model.fit(x, t)
100 |             propensity = self.proensity_model.predict_proba(x)[:, 1:]
101 |             kdtree = cKDTree(propensity)
102 |             _, idx = kdtree.query(propensity, k=x.shape[0])
103 |         else:
104 |             kdtree = cKDTree(x)
105 |             _, idx = kdtree.query(x, k=x.shape[0])
106 |         idx = idx[:, 1:]
107 |         treated = np.where(t == 1)[0]
108 |         control = np.where(t == 0)[0]
109 |         bool_treated = np.isin(idx, treated)
110 |         bool_control = np.isin(idx, control)
111 | 
112 |         nn_effect = np.zeros(x.shape)
113 |         for i in range(len(bool_treated)):
114 |             i_treat_idx = np.where(bool_treated[i, :])[0][:k]
115 |             i_control_idx = np.where(bool_control[i, :])[0][:k]
116 | 
117 |             i_treat_nn = y[idx[i, i_treat_idx]]
118 |             i_cont_nn = y[idx[i, i_control_idx]]
119 | 
120 |             nn_effect[i] = np.mean(i_treat_nn) - np.mean(i_cont_nn)
121 | 
122 |         return nn_effect
123 | 
124 |     @abstractmethod
125 |     def fit(self, x, y, t):
126 |         pass
127 | 
128 |     def predict(self, x):
129 | 
130 |         def _predict(node: PEHENode, observation):
131 |             if node.is_leaf:
132 |                 return node.effect
133 |             else:
134 |                 v = observation[node.col]
135 |                 if v >= node.value:
136 |                     branch = node.true_branch
137 |                 else:
138 |                     branch = node.false_branch
139 | 
140 |             return _predict(branch, observation)
141 | 
142 |         if len(x.shape) == 1:
143 |             prediction = _predict(self.root, x)
144 |             return prediction
145 | 
146 |         num_test = x.shape[0]
147 | 
148 |         prediction = np.zeros(num_test)
149 | 
150 |         for i in range(num_test):
151 |             test_example = x[i, :]
152 |             prediction[i] = _predict(self.root, test_example)
153 | 
154 |         return prediction
155 | 
156 |     def get_groups(self, x):
157 | 
158 |         def _get_group(node: PEHENode, observation):
159 |             if node.is_leaf:
160 |                 return node.leaf_num
161 |             else:
162 |                 v = observation[node.col]
163 |                 if v >= node.value:
164 |                     branch = node.true_branch
165 |                 else:
166 |                     branch = node.false_branch
167 | 
168 |             return _get_group(branch, observation)
169 | 
170 |         if len(x.shape) == 1:
171 |             return _get_group(self.root, x)
172 |         num_test = x.shape[0]
173 |         leaf_results = np.zeros(num_test)
174 | 
175 |         for i in range(num_test):
176 |             test_example = x[i, :]
177 |             leaf_results[i] = _get_group(self.root, test_example)
178 | 
179 |         return leaf_results
180 | 
181 |     def get_features(self, x):
182 | 
183 |         def _get_features(node: PEHENode, observation, features):
184 |             if node.is_leaf:
185 |                 return features
186 |             else:
187 |                 v = observation[node.col]
188 |                 if v >= node.value:
189 |                     branch = node.true_branch
190 |                 else:
191 |                     branch = node.false_branch
192 | 
193 |             features.append(node.decision)
194 |             return _get_features(branch, observation, features)
195 | 
196 |         if len(x.shape) == 1:
197 |             features = []
198 |             return _get_features(self.root, x, features)
199 |         num_test = x.shape[0]
200 |         leaf_features = []
201 | 
202 |         for i in range(num_test):
203 |             features = []
204 |             test_example = x[i, :]
205 |             leaf_features.append(_get_features(self.root, test_example, features))
206 | 
207 |         return leaf_features
208 | 
209 |     def prune(self, alpha=0.05):
210 | 
211 |         def _prune(node: PEHENode):
212 |             if node.true_branch is None or node.false_branch is None:
213 |                 return
214 | 
215 |             # recursive call for each branch
216 |             if not node.true_branch.is_leaf:
217 |                 _prune(node.true_branch)
218 |             if not node.false_branch.is_leaf:
219 |                 _prune(node.false_branch)
220 | 
221 |             # merge leaves (potentially)
222 |             if node.true_branch.is_leaf and node.false_branch.is_leaf:
223 |                 # Get branches
224 |                 tb = node.true_branch
225 |                 fb = node.false_branch
226 | 
227 |                 tb_pval = tb.p_val
228 |                 fb_pval = fb.p_val
229 | 
230 |                 if tb_pval > alpha and fb_pval > alpha:
231 |                     node.leaf_num = node.true_branch.leaf_num
232 |                     node.true_branch = None
233 |                     node.false_branch = None
234 |                     self.num_leaves = self.num_leaves - 1
235 |                     node.is_leaf = True
236 | 
237 |                     # ----------------------------------------------------------------
238 |                     # Something about obj/mse? if that is added
239 |                     #
240 |                     # - can do a self function so that tree references itself/it's own type of node?
241 |                     # ----------------------------------------------------------------
242 |                     if tb.node_depth == self.tree_depth:
243 |                         self.tree_depth = self.tree_depth - 1
244 | 
245 |         _prune(self.root)
246 | 
247 |     def get_triggers(self, x):
248 |         pass
249 | 
250 |     def save(self, filename):
251 |         import pickle as pkl
252 | 
253 |         check_dir(filename)
254 |         with open(filename, "wb") as file:
255 |             pkl.dump(self, file)
256 | 


--------------------------------------------------------------------------------
/CTL/causal_tree/sig_diff/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/CTL/causal_tree/sig_diff/__init__.py


--------------------------------------------------------------------------------
/CTL/causal_tree/sig_diff/sig.py:
--------------------------------------------------------------------------------
  1 | # from CTL.causal_tree.util import *
  2 | try:
  3 |     from CTL.causal_tree.util_c import *
  4 | except:
  5 |     from CTL.causal_tree.util import *
  6 | from CTL.causal_tree.ct import *
  7 | import numpy as np
  8 | from scipy.stats import ttest_ind_from_stats
  9 | 
 10 | 
 11 | class SigNode(CTNode):
 12 | 
 13 |     def __init__(self, p_val=1.0, effect=0.0, node_depth=0, control_mean=0.0, treatment_mean=0.0, col=-1, value=-1,
 14 |                  is_leaf=False, leaf_num=-1, num_samples=0.0, obj=0.0):
 15 |         super().__init__()
 16 |         # not tree specific features (most likely added at creation)
 17 |         self.p_val = p_val
 18 |         self.effect = effect
 19 |         self.node_depth = node_depth
 20 |         self.control_mean = control_mean
 21 |         self.treatment_mean = treatment_mean
 22 | 
 23 |         # during tree building
 24 |         self.obj = obj
 25 |         self.num_samples = num_samples
 26 | 
 27 |         # after building tree
 28 |         self.col = col
 29 |         self.value = value
 30 |         self.is_leaf = is_leaf
 31 |         self.leaf_num = leaf_num
 32 |         self.true_branch = None
 33 |         self.false_branch = None
 34 | 
 35 |         # after calling functions
 36 |         self.column_name = ""
 37 |         self.decision = ""
 38 | 
 39 | 
 40 | class SigTree(CausalTree):
 41 | 
 42 |     def __init__(self, alpha=0.05, max_depth=-1, min_size=2, seed=724, max_values=None, verbose=False):
 43 |         super().__init__()
 44 |         self.alpha = 0.05
 45 |         self.max_depth = max_depth
 46 |         self.min_size = min_size
 47 |         self.seed = seed
 48 | 
 49 |         self.max_values = max_values
 50 |         self.verbose = verbose
 51 | 
 52 |         self.max_effect = 0.0
 53 |         self.min_effect = 0.0
 54 | 
 55 |         self.features = None
 56 | 
 57 |         self.root = SigNode()
 58 | 
 59 |     @abstractmethod
 60 |     def fit(self, x, y, t):
 61 |         pass
 62 | 
 63 |     def _eval_util(self, train_y, train_t):
 64 |         var_t, var_c = variance(train_y, train_t)
 65 |         std = np.sqrt(var_t) + np.sqrt(var_c)
 66 |         effect = ace(train_y, train_t)
 67 | 
 68 |         return effect, std
 69 | 
 70 |     def _eval(self, y_train1, t_train1, y_train2, t_train2):
 71 | 
 72 |         total1 = y_train1.shape[0]
 73 |         total2 = y_train2.shape[0]
 74 | 
 75 |         return_val = (1, 1)
 76 |         if total1 < 1 or total2 < 1:
 77 |             return return_val
 78 | 
 79 |         effect1, std1 = self._eval_util(y_train1, t_train1)
 80 |         effect2, std2 = self._eval_util(y_train2, t_train2)
 81 | 
 82 |         stat, p_val = ttest_ind_from_stats(effect1, std1, total1, effect2, std2, total2)
 83 |         return stat, p_val
 84 | 
 85 |     def predict(self, x):
 86 | 
 87 |         def _predict(node: SigNode, observation):
 88 |             if node.is_leaf:
 89 |                 return node.effect
 90 |             else:
 91 |                 v = observation[node.col]
 92 |                 if v >= node.value:
 93 |                     branch = node.true_branch
 94 |                 else:
 95 |                     branch = node.false_branch
 96 | 
 97 |             return _predict(branch, observation)
 98 | 
 99 |         if len(x.shape) == 1:
100 |             prediction = _predict(self.root, x)
101 |             return prediction
102 | 
103 |         num_test = x.shape[0]
104 | 
105 |         prediction = np.zeros(num_test)
106 | 
107 |         for i in range(num_test):
108 |             test_example = x[i, :]
109 |             prediction[i] = _predict(self.root, test_example)
110 | 
111 |         return prediction
112 | 
113 |     def get_groups(self, x):
114 | 
115 |         def _get_group(node: SigNode, observation):
116 |             if node.is_leaf:
117 |                 return node.leaf_num
118 |             else:
119 |                 v = observation[node.col]
120 |                 if v >= node.value:
121 |                     branch = node.true_branch
122 |                 else:
123 |                     branch = node.false_branch
124 | 
125 |             return _get_group(branch, observation)
126 | 
127 |         if len(x.shape) == 1:
128 |             return _get_group(self.root, x)
129 |         num_test = x.shape[0]
130 |         leaf_results = np.zeros(num_test)
131 | 
132 |         for i in range(num_test):
133 |             test_example = x[i, :]
134 |             leaf_results[i] = _get_group(self.root, test_example)
135 | 
136 |         return leaf_results
137 | 
138 |     def get_features(self, x):
139 | 
140 |         def _get_features(node: SigNode, observation, features):
141 |             if node.is_leaf:
142 |                 return features
143 |             else:
144 |                 v = observation[node.col]
145 |                 if v >= node.value:
146 |                     branch = node.true_branch
147 |                 else:
148 |                     branch = node.false_branch
149 | 
150 |             features.append(node.decision)
151 |             return _get_features(branch, observation, features)
152 | 
153 |         if len(x.shape) == 1:
154 |             features = []
155 |             return _get_features(self.root, x, features)
156 |         num_test = x.shape[0]
157 |         leaf_features = []
158 | 
159 |         for i in range(num_test):
160 |             features = []
161 |             test_example = x[i, :]
162 |             leaf_features.append(_get_features(self.root, test_example, features))
163 | 
164 |         return leaf_features
165 | 
166 |     def prune(self, alpha=0.05):
167 | 
168 |         def _prune(node: SigNode):
169 |             if node.true_branch is None or node.false_branch is None:
170 |                 return
171 | 
172 |             # recursive call for each branch
173 |             if not node.true_branch.is_leaf:
174 |                 _prune(node.true_branch)
175 |             if not node.false_branch.is_leaf:
176 |                 _prune(node.false_branch)
177 | 
178 |             # merge leaves (potentially)
179 |             if node.true_branch.is_leaf and node.false_branch.is_leaf:
180 |                 # Get branches
181 |                 tb = node.true_branch
182 |                 fb = node.false_branch
183 | 
184 |                 tb_pval = tb.p_val
185 |                 fb_pval = fb.p_val
186 | 
187 |                 if tb_pval > alpha and fb_pval > alpha:
188 |                     node.leaf_num = node.true_branch.leaf_num
189 |                     node.true_branch = None
190 |                     node.false_branch = None
191 |                     self.num_leaves = self.num_leaves - 1
192 |                     node.is_leaf = True
193 | 
194 |                     # ----------------------------------------------------------------
195 |                     # Something about obj/mse? if that is added
196 |                     #
197 |                     # - can do a self function so that tree references itself/it's own type of node?
198 |                     # ----------------------------------------------------------------
199 |                     if tb.node_depth == self.tree_depth:
200 |                         self.tree_depth = self.tree_depth - 1
201 | 
202 |         _prune(self.root)
203 | 
204 |     def get_triggers(self, x):
205 |         pass
206 | 
207 |     def save(self, filename):
208 |         import pickle as pkl
209 | 
210 |         check_dir(filename)
211 |         with open(filename, "wb") as file:
212 |             pkl.dump(self, file)


--------------------------------------------------------------------------------
/CTL/causal_tree/sig_diff/sig_base.py:
--------------------------------------------------------------------------------
  1 | from CTL.causal_tree.sig_diff.sig import *
  2 | 
  3 | 
  4 | class BaseCausalTreeLearnNode(SigNode):
  5 | 
  6 |     def __init__(self, **kwargs):
  7 |         super().__init__(**kwargs)
  8 | 
  9 | 
 10 | class SigTreeBase(SigTree):
 11 | 
 12 |     def __init__(self, **kwargs):
 13 |         super().__init__(**kwargs)
 14 |         self.root = BaseCausalTreeLearnNode()
 15 | 
 16 |     def fit(self, x, y, t):
 17 |         if x.shape[0] == 0:
 18 |             return 0
 19 | 
 20 |         # ----------------------------------------------------------------
 21 |         # Seed
 22 |         # ----------------------------------------------------------------
 23 |         np.random.seed(self.seed)
 24 | 
 25 |         train_x, train_y, train_t = x, y, t
 26 |         self.root.num_samples = train_y.shape[0]
 27 |         # ----------------------------------------------------------------
 28 |         # effect and pvals
 29 |         # ----------------------------------------------------------------
 30 |         effect = tau_squared(y, t)
 31 |         p_val = get_pval(y, t)
 32 |         self.root.effect = effect
 33 |         self.root.p_val = p_val
 34 | 
 35 |         self.root.obj = 0
 36 | 
 37 |         # ----------------------------------------------------------------
 38 |         # Add control/treatment means
 39 |         # ----------------------------------------------------------------
 40 |         self.root.control_mean = np.mean(y[t == 0])
 41 |         self.root.treatment_mean = np.mean(y[t == 1])
 42 | 
 43 |         self.root.num_samples = x.shape[0]
 44 | 
 45 |         self._fit(self.root, train_x, train_y, train_t)
 46 | 
 47 |     def _fit(self, node: BaseCausalTreeLearnNode, train_x, train_y, train_t):
 48 | 
 49 |         if train_x.shape[0] == 0:
 50 |             return node
 51 | 
 52 |         if node.node_depth > self.tree_depth:
 53 |             self.tree_depth = node.node_depth
 54 | 
 55 |         if self.max_depth == self.tree_depth:
 56 |             if node.effect > self.max_effect:
 57 |                 self.max_effect = node.effect
 58 |             if node.effect < self.min_effect:
 59 |                 self.min_effect = node.effect
 60 |             self.num_leaves += 1
 61 |             node.leaf_num = self.num_leaves
 62 |             node.is_leaf = True
 63 |             return node
 64 | 
 65 |         best_gain = 1.0
 66 |         best_attributes = []
 67 |         best_tb_obj, best_fb_obj = (0.0, 0.0)
 68 | 
 69 |         column_count = train_x.shape[1]
 70 |         for col in range(0, column_count):
 71 |             unique_vals = np.unique(train_x[:, col])
 72 | 
 73 |             if self.max_values is not None:
 74 |                 if self.max_values < 1:
 75 |                     idx = np.round(np.linspace(
 76 |                         0, len(unique_vals) - 1, self.max_values * len(unique_vals))).astype(int)
 77 |                     unique_vals = unique_vals[idx]
 78 |                 else:
 79 |                     idx = np.round(np.linspace(
 80 |                         0, len(unique_vals) - 1, self.max_values)).astype(int)
 81 |                     unique_vals = unique_vals[idx]
 82 | 
 83 |             for value in unique_vals:
 84 | 
 85 |                 # check training data size
 86 |                 (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
 87 |                     = divide_set(train_x, train_y, train_t, col, value)
 88 |                 check1 = check_min_size(self.min_size, train_t1)
 89 |                 check2 = check_min_size(self.min_size, train_t2)
 90 |                 if check1 or check2:
 91 |                     continue
 92 | 
 93 |                 t_stat, diff_pval = self._eval(train_y1, train_t1, train_y2, train_t2)
 94 | 
 95 |                 gain = diff_pval
 96 | 
 97 |                 if gain < best_gain and gain <= self.alpha:
 98 |                     best_gain = gain
 99 |                     best_attributes = [col, value]
100 | 
101 |         if best_gain <= self.alpha:
102 |             node.col = best_attributes[0]
103 |             node.value = best_attributes[1]
104 | 
105 |             (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
106 |                 = divide_set(train_x, train_y, train_t, node.col, node.value)
107 | 
108 |             y1 = train_y1
109 |             y2 = train_y2
110 |             t1 = train_t1
111 |             t2 = train_t2
112 | 
113 |             best_tb_effect = ace(y1, t1)
114 |             best_fb_effect = ace(y2, t2)
115 |             tb_p_val = get_pval(y1, t1)
116 |             fb_p_val = get_pval(y2, t2)
117 | 
118 |             self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj
119 | 
120 |             tb = BaseCausalTreeLearnNode(obj=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val,
121 |                                          node_depth=node.node_depth + 1,
122 |                                          num_samples=y1.shape[0])
123 |             fb = BaseCausalTreeLearnNode(obj=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val,
124 |                                          node_depth=node.node_depth + 1,
125 |                                          num_samples=y2.shape[0])
126 | 
127 |             node.true_branch = self._fit(tb, train_x1, train_y1, train_t1)
128 |             node.false_branch = self._fit(fb, train_x2, train_y2, train_t2)
129 | 
130 |             if node.effect > self.max_effect:
131 |                 self.max_effect = node.effect
132 |             if node.effect < self.min_effect:
133 |                 self.min_effect = node.effect
134 | 
135 |             return node
136 | 
137 |         else:
138 |             if node.effect > self.max_effect:
139 |                 self.max_effect = node.effect
140 |             if node.effect < self.min_effect:
141 |                 self.min_effect = node.effect
142 | 
143 |             self.num_leaves += 1
144 |             node.leaf_num = self.num_leaves
145 |             node.is_leaf = True
146 |             return node
147 | 


--------------------------------------------------------------------------------
/CTL/causal_tree/util.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import errno
  3 | import numpy as np
  4 | from scipy.stats import ttest_ind
  5 | import subprocess
  6 | import time
  7 | 
  8 | 
  9 | def check_dir(path):
 10 |     if not os.path.exists(os.path.dirname(path)):
 11 |         try:
 12 |             os.makedirs(os.path.dirname(path))
 13 |         except OSError as exc:
 14 |             if exc.errno != errno.EEXIST:
 15 |                 raise
 16 | 
 17 | 
 18 | def divide_set(x, y, t, col, value):
 19 |     idx1 = x[:, col] >= value
 20 |     idx2 = ~idx1
 21 | 
 22 |     x1 = x[idx1]
 23 |     x2 = x[idx2]
 24 | 
 25 |     y1 = y[idx1]
 26 |     y2 = y[idx2]
 27 | 
 28 |     t1 = t[idx1]
 29 |     t2 = t[idx2]
 30 | 
 31 |     return x1, x2, y1, y2, t1, t2
 32 | 
 33 | 
 34 | def tau_squared(y, t):
 35 |     total = y.shape[0]
 36 | 
 37 |     return_val = (-np.inf, -np.inf)
 38 | 
 39 |     if total == 0:
 40 |         return return_val
 41 | 
 42 |     treat_vect = t
 43 | 
 44 |     effect = ace(y, treat_vect)
 45 |     err = (effect ** 2) * total
 46 | 
 47 |     return effect
 48 | 
 49 | 
 50 | def tau_squared_trigger(outcome, treatment, min_size=1, quartile=False):
 51 |     """Continuous case"""
 52 |     total = outcome.shape[0]
 53 | 
 54 |     return_val = (-np.inf, -np.inf)
 55 | 
 56 |     if total == 0:
 57 |         return return_val
 58 | 
 59 |     unique_treatment = np.unique(treatment)
 60 | 
 61 |     if unique_treatment.shape[0] == 1:
 62 |         return return_val
 63 | 
 64 |     unique_treatment = (unique_treatment[1:] + unique_treatment[:-1]) / 2
 65 |     unique_treatment = unique_treatment[1:-1]
 66 | 
 67 |     if quartile:
 68 |         first_quartile = int(np.floor(unique_treatment.shape[0] / 4))
 69 |         third_quartile = int(np.ceil(3 * unique_treatment.shape[0] / 4))
 70 | 
 71 |         unique_treatment = unique_treatment[first_quartile:third_quartile]
 72 | 
 73 |     yy = np.tile(outcome, (unique_treatment.shape[0], 1))
 74 |     tt = np.tile(treatment, (unique_treatment.shape[0], 1))
 75 | 
 76 |     x = np.transpose(np.transpose(tt) > unique_treatment)
 77 | 
 78 |     tt[x] = 1
 79 |     tt[np.logical_not(x)] = 0
 80 | 
 81 |     treat_num = np.sum(tt == 1, axis=1)
 82 |     cont_num = np.sum(tt == 0, axis=1)
 83 |     min_size_idx = np.where(np.logical_and(
 84 |         treat_num >= min_size, cont_num >= min_size))
 85 | 
 86 |     unique_treatment = unique_treatment[min_size_idx]
 87 |     tt = tt[min_size_idx]
 88 |     yy = yy[min_size_idx]
 89 | 
 90 |     if tt.shape[0] == 0:
 91 |         return return_val
 92 | 
 93 |     y_t_m = np.sum((yy * (tt == 1)), axis=1) / np.sum(tt == 1, axis=1)
 94 |     y_c_m = np.sum((yy * (tt == 0)), axis=1) / np.sum(tt == 0, axis=1)
 95 | 
 96 |     effect = y_t_m - y_c_m
 97 |     err = effect ** 2
 98 | 
 99 |     max_err = np.argmax(err)
100 | 
101 |     best_effect = effect[max_err]
102 |     best_err = err[max_err]
103 |     best_split = unique_treatment[max_err]
104 | 
105 |     best_err = total * best_err
106 | 
107 |     return best_effect, best_split
108 | 
109 | 
110 | def ace(y, t):
111 |     treat = t >= 0.5
112 |     # control = t == 0
113 |     control = ~treat
114 | 
115 |     yt = y[treat]
116 |     yc = y[control]
117 | 
118 |     mu1 = 0.0
119 |     mu0 = 0.0
120 |     if yt.shape[0] != 0:
121 |         mu1 = np.mean(yt)
122 |     if yc.shape[0] != 0:
123 |         mu0 = np.mean(yc)
124 | 
125 |     return mu1 - mu0
126 | 
127 | 
128 | def ace_trigger(y, t, trigger):
129 |     treat = t >= trigger
130 |     control = ~treat
131 | 
132 |     yt = y[treat]
133 |     yc = y[control]
134 | 
135 |     mu1 = 0.0
136 |     mu0 = 0.0
137 |     if yt.shape[0] != 0:
138 |         mu1 = np.mean(yt)
139 |     if yc.shape[0] != 0:
140 |         mu0 = np.mean(yc)
141 | 
142 |     return mu1 - mu0
143 | 
144 | 
145 | def get_pval(y, t):
146 |     treat = t == 1
147 |     # control = t == 0
148 |     control = ~treat
149 | 
150 |     outcome_cont = y[treat]
151 |     outcome_trt = y[control]
152 | 
153 |     p_val = ttest_ind(outcome_cont, outcome_trt)[1]
154 | 
155 |     if np.isnan(p_val):
156 |         return 0.000
157 | 
158 |     return p_val
159 | 
160 | 
161 | def get_pval_trigger(y, t, trigger):
162 |     treat = t >= trigger
163 |     control = ~treat
164 | 
165 |     outcome_cont = y[treat]
166 |     outcome_trt = y[control]
167 | 
168 |     p_val = ttest_ind(outcome_cont, outcome_trt)[1]
169 | 
170 |     if np.isnan(p_val):
171 |         return 0.000
172 | 
173 |     return p_val
174 | 
175 | 
176 | def min_size_value_bool(min_size, t, trigger=0.5):
177 |     nt, nc = get_treat_size(t, trigger=trigger)
178 | 
179 |     return nt, nc, nt < min_size or nc < min_size
180 | 
181 | 
182 | def check_min_size(min_size, t, trigger=0.5):
183 |     nt, nc = get_treat_size(t, trigger)
184 | 
185 |     return nt < min_size or nc < min_size
186 | 
187 | 
188 | def get_treat_size(t, trigger=0.5):
189 |     treated = t >= trigger
190 |     control = ~treated
191 |     num_treatment = t[treated].shape[0]
192 |     num_control = t[control].shape[0]
193 | 
194 |     return num_treatment, num_control
195 | 
196 | 
197 | def variance(y, t):
198 |     treat_vect = t
199 | 
200 |     treat = treat_vect == 1
201 |     # control = treat_vect == 0
202 |     control = ~treat
203 | 
204 |     if y.shape[0] == 0:
205 |         return np.array([np.inf, np.inf])
206 | 
207 |     yt = y[treat]
208 |     yc = y[control]
209 | 
210 |     if yt.shape[0] == 0:
211 |         var_t = np.var(y)
212 |     else:
213 |         var_t = np.var(yt)
214 | 
215 |     if yc.shape[0] == 0:
216 |         var_c = np.var(y)
217 |     else:
218 |         var_c = np.var(yc)
219 | 
220 |     return var_t, var_c
221 | 
222 | 
223 | def variance_trigger(y, t, trigger):
224 |     treat_vect = t
225 | 
226 |     treat = treat_vect >= trigger
227 |     # control = treat_vect == 0
228 |     control = ~treat
229 | 
230 |     if y.shape[0] == 0:
231 |         return np.array([np.inf, np.inf])
232 | 
233 |     yt = y[treat]
234 |     yc = y[control]
235 | 
236 |     if yt.shape[0] == 0:
237 |         var_t = np.var(y)
238 |     else:
239 |         var_t = np.var(yt)
240 | 
241 |     if yc.shape[0] == 0:
242 |         var_c = np.var(y)
243 |     else:
244 |         var_c = np.var(yc)
245 | 
246 |     return var_t, var_c
247 | 
248 | 
249 | def col_dict(names):
250 |     feat_names = {}
251 |     for i, name in enumerate(names):
252 |         column = "Column %s" % i
253 |         feat_names[column] = name
254 |     return feat_names
255 | 


--------------------------------------------------------------------------------
/CTL/causal_tree/util_c.cpython-37m-darwin.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/CTL/causal_tree/util_c.cpython-37m-darwin.so


--------------------------------------------------------------------------------
/CTL/pehe_tree.py:
--------------------------------------------------------------------------------
 1 | from CTL._tree import _CausalTree
 2 | from CTL.causal_tree.nn_pehe.base import *
 3 | from CTL.causal_tree.nn_pehe.val import *
 4 | from CTL.causal_tree.nn_pehe.honest import *
 5 | from CTL.causal_tree.nn_pehe.balance_split import *
 6 | 
 7 | 
 8 | class PEHETree(_CausalTree):
 9 | 
10 |     def __init__(self, min_size=2, max_depth=-1, k=1,
11 |                  val=False, split_size=0.5,
12 |                  honest=False,
13 |                  use_propensity=False, propensity_model=None,
14 |                  balance=False,
15 |                  seed=724):
16 |         super().__init__()
17 | 
18 |         params = {
19 |             "min_size": min_size,
20 |             "max_depth": max_depth,
21 |             "k": k,
22 |             "seed": seed,
23 |             "split_size": split_size,
24 |             "use_propensity": use_propensity,
25 |             "propensity_model": propensity_model
26 |         }
27 |         if val:
28 |             self.tree = ValPEHE(**params)
29 |         elif honest:
30 |             self.tree = HonestPEHE(**params)
31 |         elif balance:
32 |             self.tree = BalanceBasePEHE(**params)
33 |         else:
34 |             self.tree = BasePEHE(**params)
35 | 
36 |         self.column_num = 0
37 |         self.fitted = False
38 |         self.tree_depth = 0
39 | 
40 |         self.obj = 0
41 |         self.pehe = 0
42 | 
43 |     def fit(self, x, y, t):
44 |         self.column_num = x.shape[1]
45 |         x = x.astype(np.float64)
46 |         y = y.astype(np.float64)
47 |         t = t.astype(np.float64)
48 |         self.tree.fit(x, y, t)
49 |         self.fitted = True
50 |         self.tree_depth = self.tree.tree_depth
51 |         self.obj = self.tree.obj
52 |         self.pehe = self.tree.pehe
53 | 


--------------------------------------------------------------------------------
/CTL/sig_diff_tree.py:
--------------------------------------------------------------------------------
 1 | from CTL._tree import _CausalTree
 2 | from CTL.causal_tree.sig_diff.sig_base import SigTreeBase
 3 | from CTL.causal_tree.sig_diff.sig_val import SigTreeVal
 4 | import numpy as np
 5 | 
 6 | 
 7 | class SigDiffTree(_CausalTree):
 8 | 
 9 |     def __init__(self, alpha=0.05, min_size=2, max_depth=-1, val=False, split_size=0.5, seed=724):
10 |         super().__init__()
11 | 
12 |         params = {
13 |             "alpha": alpha,
14 |             "min_size": min_size,
15 |             "max_depth": max_depth,
16 |             "seed": seed,
17 |         }
18 |         if val:
19 |             params["split_size"] = split_size
20 |             self.tree = SigTreeVal(**params)
21 |         else:
22 |             self.tree = SigTreeBase(**params)
23 | 
24 |         self.column_num = 0
25 |         self.fitted = False
26 |         self.tree_depth = 0
27 | 
28 |         self.obj = 0
29 | 
30 |     def fit(self, x, y, t):
31 |         self.column_num = x.shape[1]
32 |         x = x.astype(np.float64)
33 |         y = y.astype(np.float64)
34 |         t = t.astype(np.float64)
35 |         self.tree.fit(x, y, t)
36 |         self.fitted = True
37 |         self.tree_depth = self.tree.tree_depth
38 |         self.obj = self.tree.obj
39 | 


--------------------------------------------------------------------------------
/CTL/tree.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | 
 4 | class Node(ABC):
 5 | 
 6 |     def __init__(self):
 7 |         self.is_leaf = False
 8 | 
 9 | 
10 | class Tree(ABC):
11 | 
12 |     def __init__(self):
13 |         pass
14 | 
15 |     @abstractmethod
16 |     def fit(self, x, y, t):
17 |         pass
18 | 
19 |     @abstractmethod
20 |     def predict(self, x):
21 |         pass
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) [year] [fullname]
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CTL
 2 | 
 3 | Christopher Tran, Elena Zheleva, ["Learning Triggers for Heterogeneous Treatment Effects", AAAI 2019.](https://arxiv.org/pdf/1902.00087.pdf)
 4 | 
 5 | Our method is based on and adapted from: https://github.com/susanathey/causalTree
 6 | 
 7 | 
 8 | ## Requirements
 9 | * Python 3
10 | * sklearn
11 | * scipy
12 | * graphviz (if you want to plot the tree)
13 | 
14 | ## Installation
15 | 
16 | through pip
17 | 
18 | ```bash
19 | pip install causal_tree_learn
20 | ```
21 | 
22 | or clone the repository
23 | ```bash
24 | python setup.py build_ext --inplace
25 | ```
26 | 
27 | ## Demo Code
28 | 
29 | Two demo codes are available to run.
30 | 
31 | ```bash
32 | python binary_example.py
33 | ```
34 | Runs the tree on a binary example (asthma.txt)
35 | 
36 | ```bash
37 | python trigger_example.py
38 | ```
39 | Runs a tree on a trigger problem where the treatment is continuous (note for now the example is made up and treatment does not affect outcome, this is only to show example code)
40 | 


--------------------------------------------------------------------------------
/binary_example.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from CTL.causal_tree_learn import CausalTree
 3 | from sklearn.model_selection import train_test_split
 4 | import numpy as np
 5 | 
 6 | asthma = pd.read_csv('data/asthma.txt', delimiter=' ', index_col=None)
 7 | 
 8 | asthma.columns = ['physician', 'age', 'sex', 'education', 'insurance', 'drug coverage', 'severity',
 9 |                   'comorbidity', 'physical comorbidity', 'mental comorbidity', 'satisfaction']
10 | 
11 | y = asthma['satisfaction'].values
12 | treatment = asthma['physician'].values
13 | 
14 | x = asthma.drop(['satisfaction', 'physician'], axis=1).values
15 | 
16 | columns = asthma.drop(['satisfaction', 'physician'], axis=1).columns
17 | 
18 | y[y == 0] = -1
19 | 
20 | treatment[treatment == 1] = 0
21 | treatment[treatment == 2] = 1
22 | 
23 | np.random.seed(0)
24 | 
25 | 
26 | x_train, x_test, y_train, y_test, treat_train, treat_test = train_test_split(x, y, treatment,
27 |                                                                              test_size=0.5, random_state=42)
28 | 
29 | # regular CTL
30 | ctl = CausalTree(magnitude=False)
31 | ctl.fit(x_train, y_train, treat_train)
32 | ctl.prune()
33 | ctl_predict = ctl.predict(x_test)
34 | 
35 | # honest CTL (CT-HL)
36 | cthl = CausalTree(honest=True)
37 | cthl.fit(x_train, y_train, treat_train)
38 | cthl.prune()
39 | cthl_predict = cthl.predict(x_test)
40 | 
41 | # val honest CTL (CT-HV)
42 | cthv = CausalTree(val_honest=True)
43 | cthv.fit(x_train, y_train, treat_train)
44 | cthv.prune()
45 | cthv_predict = cthv.predict(x_test)
46 | 
47 | # adaptive CT (Athey and Imbens, PNAS 2016)
48 | ct_adaptive = CausalTree(weight=0.0, split_size=0.0)
49 | ct_adaptive.fit(x_train, y_train, treat_train)
50 | ct_adaptive.prune()
51 | ct_adaptive_predict = cthv.predict(x_test)
52 | 
53 | # honest CT (Athey and Imbens, PNAS 2016)
54 | ct_honest = CausalTree(honest=True, weight=0.0, split_size=0.0)
55 | ct_honest.fit(x_train, y_train, treat_train)
56 | ct_honest.prune()
57 | ct_honest_predict = ct_honest.predict(x_test)
58 | 
59 | ct_adaptive.plot_tree(features=columns, filename="output/bin_tree_adaptive", show_effect=True)
60 | ct_honest.plot_tree(features=columns, filename="output/bin_tree_honest", show_effect=True)
61 | ctl.plot_tree(features=columns, filename="output/bin_tree", show_effect=True)
62 | cthl.plot_tree(features=columns, filename="output/bin_tree_honest_learn", show_effect=True)
63 | cthv.plot_tree(features=columns, filename="output/bin_tree_honest_validation", show_effect=True)


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/build/lib.macosx-12.6-arm64-cpython-310/CTL/__init__.py


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_learn_forest.py:
--------------------------------------------------------------------------------
 1 | from CTL.causal_tree_learn import CausalTree
 2 | import numpy as np
 3 | 
 4 | 
 5 | class CausalTreeLearnForest:
 6 | 
 7 |     def __init__(self, num_trees=10, bootstrap=True, max_samples=None, max_features="auto", max_depth=-1,
 8 |                  val_honest=False, honest=False, min_size=2, split_size=0.5, weight=0.5, feature_batch_size=None,
 9 |                  seed=724):
10 | 
11 |         tree_params = {
12 |             "weight": weight,
13 |             "split_size": split_size,
14 |             "max_depth": max_depth,
15 |             "seed": seed,
16 |             "min_size": min_size,
17 |             "val_honest": val_honest,
18 |             "honest": honest,
19 |             "feature_batch_size": feature_batch_size,
20 |         }
21 | 
22 |         self.num_trees = num_trees
23 |         self.bootstrap = bootstrap
24 |         self.max_samples = max_samples
25 |         self.max_features = max_features
26 |         self.max_depth = max_depth
27 | 
28 |         self.trees = tuple(CausalTree(**tree_params) for i in range(num_trees))
29 | 
30 |     def fit(self, x, y, t):
31 |         x = x.astype(float)
32 |         y = y.astype(float)
33 |         t = t.astype(float)
34 | 
35 |         for tree in self.trees:
36 |             example_samples, feature_samples = self._sample(x)
37 | 
38 |             sample_x = x[np.ix_(example_samples, feature_samples)]
39 |             sample_y = y[example_samples]
40 |             sample_t = t[example_samples]
41 | 
42 |             tree.fit(sample_x, sample_y, sample_t)
43 | 
44 |     def predict(self, x):
45 |         predictions = np.zeros((self.num_trees, x.shape[0]))
46 |         for i, tree in enumerate(self.trees):
47 |             predictions[i] = tree.predict(x)
48 | 
49 |         return np.mean(predictions, axis=0)
50 | 
51 |     def _sample(self, x):
52 |         total_examples = x.shape[0]
53 |         total_features = x.shape[1]
54 | 
55 |         example_samples = self._sample_examples(total_examples)
56 |         feature_samples = self._feature_sample(total_features)
57 | 
58 |         return example_samples, feature_samples
59 | 
60 |     def _sample_examples(self, total_examples):
61 |         if self.bootstrap:
62 |             if self.max_samples:
63 |                 if isinstance(self.max_samples, float):
64 |                     example_samples = np.random.choice(np.arange(0, total_examples),
65 |                                                        size=int(self.max_samples * total_examples))
66 |                 elif isinstance(self.max_samples, int):
67 |                     example_samples = np.random.choice(np.arange(0, total_examples), size=self.max_samples)
68 |                 else:
69 |                     example_samples = np.random.choice(np.arange(0, total_examples), size=total_examples)
70 |             else:
71 |                 example_samples = np.random.choice(np.arange(0, total_examples), size=total_examples)
72 |         else:
73 |             example_samples = np.arange(0, total_examples)
74 | 
75 |         return example_samples
76 | 
77 |     def _feature_sample(self, total_features):
78 |         num_features = self._feature_sample_size(total_features)
79 |         feature_samples = np.random.permutation(total_features)[:num_features]
80 |         return feature_samples
81 | 
82 |     def _feature_sample_size(self, total_features):
83 |         num_features = total_features
84 |         if self.max_features == "auto" or self.max_features == "sqrt":
85 |             num_features = int(np.sqrt(num_features))
86 |         elif isinstance(self.max_features, int):
87 |             num_features = self.max_features
88 |         elif isinstance(self.max_features, float):
89 |             num_features = int(self.max_features * total_features)
90 |         return num_features
91 | 


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/__init__.py


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/ct.py:
--------------------------------------------------------------------------------
 1 | from CTL.tree import *
 2 | from abc import ABC, abstractmethod
 3 | 
 4 | 
 5 | class CTNode(ABC):
 6 | 
 7 |     def __init__(self):
 8 |         super().__init__()
 9 | 
10 | 
11 | class CausalTree(ABC):
12 | 
13 |     def __init__(self):
14 |         super().__init__()
15 | 
16 |         # the learning objective
17 |         self.obj = 0.0
18 |         # Haven't implemented "mse" yet
19 |         self.mse = 0.0
20 | 
21 |         # tree properties
22 |         self.tree_depth = 0
23 |         self.num_leaves = 0
24 | 
25 |     @abstractmethod
26 |     def fit(self, x, y, t):
27 |         pass
28 | 
29 |     @abstractmethod
30 |     def predict(self, x):
31 |         pass
32 | 


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/ctl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/ctl/__init__.py


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/ctl/adaptive.py:
--------------------------------------------------------------------------------
  1 | from CTL.causal_tree.ctl.binary_ctl import *
  2 | from sklearn.model_selection import train_test_split
  3 | 
  4 | 
  5 | class AdaptiveNode(CTLearnNode):
  6 | 
  7 |     def __init__(self, **kwargs):
  8 |         super().__init__(**kwargs)
  9 | 
 10 |         # self.obj = obj
 11 | 
 12 | 
 13 | # ----------------------------------------------------------------
 14 | # Base causal tree (ctl, base objective)
 15 | # ----------------------------------------------------------------
 16 | class AdaptiveTree(CTLearn):
 17 | 
 18 |     def __init__(self, **kwargs):
 19 |         super().__init__(**kwargs)
 20 |         self.root = AdaptiveNode()
 21 | 
 22 |     def adaptive_eval(self, train_y, train_t):
 23 |         total_train = train_y.shape[0]
 24 | 
 25 |         train_effect = ace(train_y, train_t)
 26 | 
 27 |         train_mse = total_train * (train_effect ** 2)
 28 | 
 29 |         obj = train_mse
 30 |         mse = total_train * (train_effect ** 2)
 31 | 
 32 |         return obj, mse
 33 | 
 34 |     def fit(self, x, y, t):
 35 |         if x.shape[0] == 0:
 36 |             return 0
 37 | 
 38 |         # ----------------------------------------------------------------
 39 |         # Seed
 40 |         # ----------------------------------------------------------------
 41 |         np.random.seed(self.seed)
 42 | 
 43 |         # ----------------------------------------------------------------
 44 |         # Verbosity?
 45 |         # ----------------------------------------------------------------
 46 | 
 47 |         # ----------------------------------------------------------------
 48 |         # Split data
 49 |         # ----------------------------------------------------------------
 50 | 
 51 |         self.root.num_samples = y.shape[0]
 52 |         # ----------------------------------------------------------------
 53 |         # effect and pvals
 54 |         # ----------------------------------------------------------------
 55 |         effect = tau_squared(y, t)
 56 |         p_val = get_pval(y, t)
 57 |         self.root.effect = effect
 58 |         self.root.p_val = p_val
 59 | 
 60 |         # ----------------------------------------------------------------
 61 |         # Not sure if i should eval in root or not
 62 |         # ----------------------------------------------------------------
 63 |         node_eval, mse = self.adaptive_eval(y, t)
 64 |         self.root.obj = node_eval
 65 | 
 66 |         # ----------------------------------------------------------------
 67 |         # Add control/treatment means
 68 |         # ----------------------------------------------------------------
 69 |         self.root.control_mean = np.mean(y[t == 0])
 70 |         self.root.treatment_mean = np.mean(y[t == 1])
 71 | 
 72 |         self.root.num_samples = x.shape[0]
 73 | 
 74 |         self._fit(self.root, x, y, t)
 75 | 
 76 |     def _fit(self, node: AdaptiveNode, train_x, train_y, train_t):
 77 | 
 78 |         if train_x.shape[0] == 0:
 79 |             return node
 80 | 
 81 |         if node.node_depth > self.tree_depth:
 82 |             self.tree_depth = node.node_depth
 83 | 
 84 |         if self.max_depth == self.tree_depth:
 85 |             if node.effect > self.max_effect:
 86 |                 self.max_effect = node.effect
 87 |             if node.effect < self.min_effect:
 88 |                 self.min_effect = node.effect
 89 |             self.num_leaves += 1
 90 |             node.leaf_num = self.num_leaves
 91 |             node.is_leaf = True
 92 |             return node
 93 | 
 94 |         best_gain = 0.0
 95 |         best_attributes = []
 96 |         best_tb_obj, best_fb_obj = (0.0, 0.0)
 97 | 
 98 |         column_count = train_x.shape[1]
 99 |         for col in range(0, column_count):
100 |             unique_vals = np.unique(train_x[:, col])
101 | 
102 |             if self.max_values is not None:
103 |                 if self.max_values < 1:
104 |                     idx = np.round(np.linspace(
105 |                         0, len(unique_vals) - 1, self.max_values * len(unique_vals))).astype(int)
106 |                     unique_vals = unique_vals[idx]
107 |                 else:
108 |                     idx = np.round(np.linspace(
109 |                         0, len(unique_vals) - 1, self.max_values)).astype(int)
110 |                     unique_vals = unique_vals[idx]
111 | 
112 |             for value in unique_vals:
113 | 
114 |                 # check training data size
115 |                 (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
116 |                     = divide_set(train_x, train_y, train_t, col, value)
117 |                 check1 = check_min_size(self.min_size, train_t1)
118 |                 check2 = check_min_size(self.min_size, train_t2)
119 |                 if check1 or check2:
120 |                     continue
121 | 
122 |                 tb_eval, tb_mse = self.adaptive_eval(train_y1, train_t1)
123 |                 fb_eval, fb_mse = self.adaptive_eval(train_y2, train_t2)
124 | 
125 |                 split_eval = (tb_eval + fb_eval)
126 |                 gain = -node.obj + split_eval
127 | 
128 |                 if gain > best_gain:
129 |                     best_gain = gain
130 |                     best_attributes = [col, value]
131 |                     best_tb_obj, best_fb_obj = (tb_eval, fb_eval)
132 | 
133 |         if best_gain > 0:
134 |             node.col = best_attributes[0]
135 |             node.value = best_attributes[1]
136 | 
137 |             (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
138 |                 = divide_set(train_x, train_y, train_t, node.col, node.value)
139 | 
140 |             y1 = train_y1
141 |             y2 = train_y2
142 |             t1 = train_t1
143 |             t2 = train_t2
144 | 
145 |             best_tb_effect = ace(y1, t1)
146 |             best_fb_effect = ace(y2, t2)
147 |             tb_p_val = get_pval(y1, t1)
148 |             fb_p_val = get_pval(y2, t2)
149 | 
150 |             self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj
151 | 
152 |             # ----------------------------------------------------------------
153 |             # Ignore "mse" here, come back to it later?
154 |             # ----------------------------------------------------------------
155 | 
156 |             tb = AdaptiveNode(obj=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val,
157 |                               node_depth=node.node_depth + 1,
158 |                               num_samples=y1.shape[0])
159 |             fb = AdaptiveNode(obj=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val,
160 |                               node_depth=node.node_depth + 1,
161 |                               num_samples=y2.shape[0])
162 | 
163 |             node.true_branch = self._fit(tb, train_x1, train_y1, train_t1)
164 |             node.false_branch = self._fit(fb, train_x2, train_y2, train_t2)
165 | 
166 |             if node.effect > self.max_effect:
167 |                 self.max_effect = node.effect
168 |             if node.effect < self.min_effect:
169 |                 self.min_effect = node.effect
170 | 
171 |             return node
172 | 
173 |         else:
174 |             if node.effect > self.max_effect:
175 |                 self.max_effect = node.effect
176 |             if node.effect < self.min_effect:
177 |                 self.min_effect = node.effect
178 | 
179 |             self.num_leaves += 1
180 |             node.leaf_num = self.num_leaves
181 |             node.is_leaf = True
182 |             return node
183 | 


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/ctl_match/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/ctl_match/__init__.py


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/ctl_match/ctl_base.py:
--------------------------------------------------------------------------------
  1 | from CTL.causal_tree.ctl_match.binary_ctl import *
  2 | from sklearn.model_selection import train_test_split
  3 | 
  4 | 
  5 | class BaseCausalTreeLearnNode(CTLearnNode):
  6 | 
  7 |     def __init__(self, **kwargs):
  8 |         super().__init__(**kwargs)
  9 | 
 10 |         # self.obj = obj
 11 | 
 12 | 
 13 | # ----------------------------------------------------------------
 14 | # Base causal tree (ctl, base objective)
 15 | # ----------------------------------------------------------------
 16 | class CTLMatchBase(CTLMatch):
 17 | 
 18 |     def __init__(self, **kwargs):
 19 |         super().__init__(**kwargs)
 20 |         self.root = BaseCausalTreeLearnNode()
 21 | 
 22 |     def fit(self, x, y, t):
 23 |         if x.shape[0] == 0:
 24 |             return 0
 25 | 
 26 |         # ----------------------------------------------------------------
 27 |         # Seed
 28 |         # ----------------------------------------------------------------
 29 |         np.random.seed(self.seed)
 30 | 
 31 |         # ----------------------------------------------------------------
 32 |         # Verbosity?
 33 |         # ----------------------------------------------------------------
 34 | 
 35 |         # ----------------------------------------------------------------
 36 |         # Split data
 37 |         # ----------------------------------------------------------------
 38 |         train_x, val_x, train_y, val_y, train_t, val_t = train_test_split(x, y, t, random_state=self.seed, shuffle=True,
 39 |                                                                           test_size=self.val_split)
 40 | 
 41 |         self.normalizer.fit(train_x)
 42 | 
 43 |         self.root.num_samples = y.shape[0]
 44 |         # ----------------------------------------------------------------
 45 |         # effect and pvals
 46 |         # ----------------------------------------------------------------
 47 |         effect = tau_squared(y, t)
 48 |         p_val = get_pval(y, t)
 49 |         self.root.effect = effect
 50 |         self.root.p_val = p_val
 51 | 
 52 |         # ----------------------------------------------------------------
 53 |         # Not sure if i should eval in root or not
 54 |         # ----------------------------------------------------------------
 55 |         node_eval, mse = self._eval(train_y, train_t, val_y, val_t)
 56 |         self.root.obj = node_eval
 57 | 
 58 |         # ----------------------------------------------------------------
 59 |         # Add control/treatment means
 60 |         # ----------------------------------------------------------------
 61 |         self.root.control_mean = np.mean(y[t == 0])
 62 |         self.root.treatment_mean = np.mean(y[t == 1])
 63 | 
 64 |         self.root.num_samples = x.shape[0]
 65 | 
 66 |         self._fit(self.root, train_x, train_y, train_t, val_x, val_y, val_t)
 67 | 
 68 |     def _fit(self, node: BaseCausalTreeLearnNode, train_x, train_y, train_t, val_x, val_y, val_t):
 69 | 
 70 |         if train_x.shape[0] == 0 or val_x.shape[0] == 0:
 71 |             node.is_leaf = True
 72 |             return node
 73 | 
 74 |         if node.node_depth > self.tree_depth:
 75 |             self.tree_depth = node.node_depth
 76 | 
 77 |         if self.max_depth == self.tree_depth:
 78 |             self.num_leaves += 1
 79 |             node.leaf_num = self.num_leaves
 80 |             node.is_leaf = True
 81 |             return node
 82 | 
 83 |         best_gain = 0.0
 84 |         best_attributes = []
 85 |         best_tb_obj, best_fb_obj = (0.0, 0.0)
 86 | 
 87 |         column_count = train_x.shape[1]
 88 |         for col in range(0, column_count):
 89 |             unique_vals = np.unique(train_x[:, col])
 90 | 
 91 |             # ----------------------------------------------------------------
 92 |             # TODO: Max values stuff
 93 |             # ----------------------------------------------------------------
 94 | 
 95 |             # using the faster evaluation with vector/matrix calculations
 96 |             try:
 97 |                 if self.feature_batch_size is None:
 98 |                     split_obj, upper_obj, lower_obj, value = self._eval_fast(train_x, train_y, train_t, val_x, val_y,
 99 |                                                                              val_t,
100 |                                                                              unique_vals, col)
101 |                     gain = -node.obj + split_obj
102 |                     if gain > best_gain:
103 |                         best_gain = gain
104 |                         best_attributes = [col, value]
105 |                         best_tb_obj, best_fb_obj = (upper_obj, lower_obj)
106 |                 else:
107 | 
108 |                     for x in batch(unique_vals, self.feature_batch_size):
109 |                         split_obj, upper_obj, lower_obj, value = self._eval_fast(train_x, train_y, train_t, val_x,
110 |                                                                                  val_y, val_t, x, col)
111 | 
112 |                         gain = -node.obj + split_obj
113 |                         if gain > best_gain:
114 |                             best_gain = gain
115 |                             best_attributes = [col, value]
116 |                             best_tb_obj, best_fb_obj = (upper_obj, lower_obj)
117 |             # if that fails (due to memory maybe?) then use the old calculation
118 |             except:
119 |                 for value in unique_vals:
120 | 
121 |                     (val_x1, val_x2, val_y1, val_y2, val_t1, val_t2) \
122 |                         = divide_set(val_x, val_y, val_t, col, value)
123 | 
124 |                     # check validation set size
125 |                     val_size = self.val_split * self.min_size if self.val_split * self.min_size > 2 else 2
126 |                     if check_min_size(val_size, val_t1) or check_min_size(val_size, val_t2):
127 |                         continue
128 | 
129 |                     # check training data size
130 |                     (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
131 |                         = divide_set(train_x, train_y, train_t, col, value)
132 |                     check1 = check_min_size(self.min_size, train_t1)
133 |                     check2 = check_min_size(self.min_size, train_t2)
134 |                     if check1 or check2:
135 |                         continue
136 | 
137 |                     tb_eval, tb_mse = self._eval(train_y1, train_t1, val_y1, val_t1)
138 |                     fb_eval, fb_mse = self._eval(train_y2, train_t2, val_y2, val_t2)
139 | 
140 |                     split_eval = (tb_eval + fb_eval)
141 |                     gain = -node.obj + split_eval
142 | 
143 |                     if gain > best_gain:
144 |                         best_gain = gain
145 |                         best_attributes = [col, value]
146 |                         best_tb_obj, best_fb_obj = (tb_eval, fb_eval)
147 | 
148 |         if best_gain > 0:
149 |             node.col = best_attributes[0]
150 |             node.value = best_attributes[1]
151 | 
152 |             # print(node.col)
153 | 
154 |             (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
155 |                 = divide_set(train_x, train_y, train_t, node.col, node.value)
156 | 
157 |             (val_x1, val_x2, val_y1, val_y2, val_t1, val_t2) \
158 |                 = divide_set(val_x, val_y, val_t, node.col, node.value)
159 | 
160 |             y1 = np.concatenate((train_y1, val_y1))
161 |             y2 = np.concatenate((train_y2, val_y2))
162 |             t1 = np.concatenate((train_t1, val_t1))
163 |             t2 = np.concatenate((train_t2, val_t2))
164 | 
165 |             best_tb_effect = ace(y1, t1)
166 |             best_fb_effect = ace(y2, t2)
167 |             tb_p_val = get_pval(y1, t1)
168 |             fb_p_val = get_pval(y2, t2)
169 | 
170 |             self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj
171 | 
172 |             # ----------------------------------------------------------------
173 |             # Ignore "mse" here, come back to it later?
174 |             # ----------------------------------------------------------------
175 | 
176 |             tb = BaseCausalTreeLearnNode(obj=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val,
177 |                                          node_depth=node.node_depth + 1,
178 |                                          num_samples=y1.shape[0])
179 |             fb = BaseCausalTreeLearnNode(obj=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val,
180 |                                          node_depth=node.node_depth + 1,
181 |                                          num_samples=y2.shape[0])
182 | 
183 |             node.true_branch = self._fit(tb, train_x1, train_y1, train_t1, val_x1, val_y1, val_t1)
184 |             node.false_branch = self._fit(fb, train_x2, train_y2, train_t2, val_x2, val_y2, val_t2)
185 | 
186 |             if node.effect > self.max_effect:
187 |                 self.max_effect = node.effect
188 |             if node.effect < self.min_effect:
189 |                 self.min_effect = node.effect
190 | 
191 |             return node
192 | 
193 |         else:
194 |             if node.effect > self.max_effect:
195 |                 self.max_effect = node.effect
196 |             if node.effect < self.min_effect:
197 |                 self.min_effect = node.effect
198 | 
199 |             self.num_leaves += 1
200 |             node.leaf_num = self.num_leaves
201 |             node.is_leaf = True
202 |             return node
203 | 


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/ctl_trigger/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/ctl_trigger/__init__.py


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/ctl_trigger/adaptive_trigger.py:
--------------------------------------------------------------------------------
  1 | from CTL.causal_tree.ctl_trigger.trigger_ctl import *
  2 | from sklearn.model_selection import train_test_split
  3 | 
  4 | 
  5 | class AdaptiveTriggerNode(TriggerNode):
  6 | 
  7 |     def __init__(self, **kwargs):
  8 |         super().__init__(**kwargs)
  9 | 
 10 |         # self.obj = obj
 11 | 
 12 | 
 13 | # ----------------------------------------------------------------
 14 | # Base causal tree (ctl, base objective)
 15 | # ----------------------------------------------------------------
 16 | class AdaptiveTriggerTree(TriggerTree):
 17 | 
 18 |     def __init__(self, **kwargs):
 19 |         super().__init__(**kwargs)
 20 |         self.root = AdaptiveTriggerNode()
 21 | 
 22 |     def adaptive_eval(self, train_y, train_t):
 23 | 
 24 |         total_train = train_y.shape[0]
 25 |         return_val = (-np.inf, -np.inf, -np.inf)
 26 | 
 27 |         if total_train == 0:
 28 |             return return_val
 29 | 
 30 |         train_effect, best_trigger = tau_squared_trigger(train_y, train_t, self.min_size, self.quartile)
 31 | 
 32 |         if train_effect <= -np.inf:
 33 |             return return_val
 34 | 
 35 |         train_err = train_effect ** 2
 36 | 
 37 |         train_mse = total_train * train_err
 38 |         obj = train_mse
 39 | 
 40 |         best_obj = obj
 41 |         best_mse = train_err
 42 | 
 43 |         return best_obj, best_trigger, best_mse
 44 | 
 45 |     def fit(self, x, y, t):
 46 |         if x.shape[0] == 0:
 47 |             return 0
 48 | 
 49 |         # ----------------------------------------------------------------
 50 |         # Seed
 51 |         # ----------------------------------------------------------------
 52 |         np.random.seed(self.seed)
 53 | 
 54 |         # ----------------------------------------------------------------
 55 |         # Verbosity?
 56 |         # ----------------------------------------------------------------
 57 | 
 58 |         # ----------------------------------------------------------------
 59 |         # Split data
 60 |         # ----------------------------------------------------------------
 61 | 
 62 |         self.root.num_samples = y.shape[0]
 63 |         # ----------------------------------------------------------------
 64 |         # effect and pvals
 65 |         # ----------------------------------------------------------------
 66 |         effect, trigger = tau_squared_trigger(y, t, self.min_size, self.quartile)
 67 |         p_val = get_pval_trigger(y, t, trigger)
 68 |         self.root.effect = effect
 69 |         self.root.p_val = p_val
 70 |         self.root.trigger = trigger
 71 | 
 72 |         # ----------------------------------------------------------------
 73 |         # Not sure if i should eval in root or not
 74 |         # ----------------------------------------------------------------
 75 |         node_eval, trigger, mse = self.adaptive_eval(y, t)
 76 |         self.root.obj = node_eval
 77 | 
 78 |         # ----------------------------------------------------------------
 79 |         # Add control/treatment means
 80 |         # ----------------------------------------------------------------
 81 |         self.root.control_mean = np.mean(y[t >= trigger])
 82 |         self.root.treatment_mean = np.mean(y[t < trigger])
 83 | 
 84 |         self.root.num_samples = x.shape[0]
 85 | 
 86 |         self._fit(self.root, x, y, t)
 87 | 
 88 |     def _fit(self, node: AdaptiveTriggerNode, train_x, train_y, train_t):
 89 | 
 90 |         if train_x.shape[0] == 0:
 91 |             return node
 92 | 
 93 |         if node.node_depth > self.tree_depth:
 94 |             self.tree_depth = node.node_depth
 95 | 
 96 |         if self.max_depth == self.tree_depth:
 97 |             if node.effect > self.max_effect:
 98 |                 self.max_effect = node.effect
 99 |             if node.effect < self.min_effect:
100 |                 self.min_effect = node.effect
101 |             self.num_leaves += 1
102 |             node.leaf_num = self.num_leaves
103 |             node.is_leaf = True
104 |             return node
105 | 
106 |         best_gain = 0.0
107 |         best_attributes = []
108 |         best_tb_obj, best_fb_obj = (0.0, 0.0)
109 |         best_tb_trigger, best_fb_trigger = (0.0, 0.0)
110 | 
111 |         column_count = train_x.shape[1]
112 |         for col in range(0, column_count):
113 |             unique_vals = np.unique(train_x[:, col])
114 | 
115 |             if self.max_values is not None:
116 |                 if self.max_values < 1:
117 |                     idx = np.round(np.linspace(0, len(unique_vals) - 1, self.max_values * len(unique_vals))).astype(int)
118 |                     unique_vals = unique_vals[idx]
119 |                 else:
120 |                     idx = np.round(np.linspace(
121 |                         0, len(unique_vals) - 1, self.max_values)).astype(int)
122 |                     unique_vals = unique_vals[idx]
123 | 
124 |             for value in unique_vals:
125 | 
126 |                 # check training data size
127 |                 (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
128 |                     = divide_set(train_x, train_y, train_t, col, value)
129 |                 check1 = check_min_size(self.min_size, train_t1)
130 |                 check2 = check_min_size(self.min_size, train_t2)
131 |                 if check1 or check2:
132 |                     continue
133 | 
134 |                 tb_eval, tb_trigger, tb_mse = self.adaptive_eval(train_y1, train_t1)
135 |                 fb_eval, fb_trigger, fb_mse = self.adaptive_eval(train_y2, train_t2)
136 | 
137 |                 split_eval = (tb_eval + fb_eval)
138 |                 gain = -node.obj + split_eval
139 | 
140 |                 if gain > best_gain:
141 |                     best_gain = gain
142 |                     best_attributes = [col, value]
143 |                     best_tb_obj, best_fb_obj = (tb_eval, fb_eval)
144 |                     best_tb_trigger, best_fb_trigger = (tb_trigger, fb_trigger)
145 | 
146 |         if best_gain > 0:
147 |             node.col = best_attributes[0]
148 |             node.value = best_attributes[1]
149 | 
150 |             (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
151 |                 = divide_set(train_x, train_y, train_t, node.col, node.value)
152 | 
153 |             y1 = train_y1
154 |             y2 = train_y2
155 |             t1 = train_t1
156 |             t2 = train_t2
157 | 
158 |             best_tb_effect = ace(y1, t1)
159 |             best_fb_effect = ace(y2, t2)
160 |             tb_p_val = get_pval(y1, t1)
161 |             fb_p_val = get_pval(y2, t2)
162 | 
163 |             self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj
164 | 
165 |             # ----------------------------------------------------------------
166 |             # Ignore "mse" here, come back to it later?
167 |             # ----------------------------------------------------------------
168 | 
169 |             tb = AdaptiveTriggerNode(obj=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val,
170 |                                      node_depth=node.node_depth + 1,
171 |                                      num_samples=y1.shape[0], trigger=best_tb_trigger)
172 |             fb = AdaptiveTriggerNode(obj=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val,
173 |                                      node_depth=node.node_depth + 1,
174 |                                      num_samples=y2.shape[0], trigger=best_fb_trigger)
175 | 
176 |             node.true_branch = self._fit(tb, train_x1, train_y1, train_t1)
177 |             node.false_branch = self._fit(fb, train_x2, train_y2, train_t2)
178 | 
179 |             if node.effect > self.max_effect:
180 |                 self.max_effect = node.effect
181 |             if node.effect < self.min_effect:
182 |                 self.min_effect = node.effect
183 | 
184 |             return node
185 | 
186 |         else:
187 |             if node.effect > self.max_effect:
188 |                 self.max_effect = node.effect
189 |             if node.effect < self.min_effect:
190 |                 self.min_effect = node.effect
191 | 
192 |             self.num_leaves += 1
193 |             node.leaf_num = self.num_leaves
194 |             node.is_leaf = True
195 |             return node
196 | 


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/ctl_trigger/ctl_base_trigger.py:
--------------------------------------------------------------------------------
  1 | from CTL.causal_tree.ctl_trigger.trigger_ctl import *
  2 | from sklearn.model_selection import train_test_split
  3 | 
  4 | 
  5 | class TriggerBaseNode(TriggerNode):
  6 | 
  7 |     def __init__(self, **kwargs):
  8 |         super().__init__(**kwargs)
  9 | 
 10 | 
 11 | # ----------------------------------------------------------------
 12 | # Base causal tree (ctl, base objective)
 13 | # ----------------------------------------------------------------
 14 | class TriggerTreeBase(TriggerTree):
 15 | 
 16 |     def __init__(self, **kwargs):
 17 |         super().__init__(**kwargs)
 18 |         self.root = TriggerBaseNode()
 19 | 
 20 |     def fit(self, x, y, t):
 21 |         if x.shape[0] == 0:
 22 |             return 0
 23 | 
 24 |         # ----------------------------------------------------------------
 25 |         # Seed
 26 |         # ----------------------------------------------------------------
 27 |         np.random.seed(self.seed)
 28 | 
 29 |         # ----------------------------------------------------------------
 30 |         # Verbosity?
 31 |         # ----------------------------------------------------------------
 32 | 
 33 |         # ----------------------------------------------------------------
 34 |         # Split data
 35 |         # ----------------------------------------------------------------
 36 |         train_x, val_x, train_y, val_y, train_t, val_t = train_test_split(x, y, t, random_state=self.seed, shuffle=True,
 37 |                                                                           test_size=self.val_split)
 38 |         self.root.num_samples = y.shape[0]
 39 |         # ----------------------------------------------------------------
 40 |         # effect and pvals
 41 |         # ----------------------------------------------------------------
 42 |         effect, trigger = tau_squared_trigger(y, t, self.min_size, self.quartile)
 43 |         p_val = get_pval_trigger(y, t, trigger)
 44 |         self.root.effect = effect
 45 |         self.root.p_val = p_val
 46 |         self.root.trigger = trigger
 47 | 
 48 |         # ----------------------------------------------------------------
 49 |         # Not sure if i should eval in root or not
 50 |         # ----------------------------------------------------------------
 51 |         node_eval, trigger, mse = self._eval(train_y, train_t, val_y, val_t)
 52 |         self.root.obj = node_eval
 53 | 
 54 |         # ----------------------------------------------------------------
 55 |         # Add control/treatment means
 56 |         # ----------------------------------------------------------------
 57 |         self.root.control_mean = np.mean(y[t >= trigger])
 58 |         self.root.treatment_mean = np.mean(y[t < trigger])
 59 | 
 60 |         self.root.num_samples = x.shape[0]
 61 | 
 62 |         self._fit(self.root, train_x, train_y, train_t, val_x, val_y, val_t)
 63 | 
 64 |     def _fit(self, node: TriggerBaseNode, train_x, train_y, train_t, val_x, val_y, val_t):
 65 | 
 66 |         if train_x.shape[0] == 0 or val_x.shape[0] == 0:
 67 |             return node
 68 | 
 69 |         if node.node_depth > self.tree_depth:
 70 |             self.tree_depth = node.node_depth
 71 | 
 72 |         if self.max_depth == self.tree_depth:
 73 |             if node.effect > self.max_effect:
 74 |                 self.max_effect = node.effect
 75 |             if node.effect < self.min_effect:
 76 |                 self.min_effect = node.effect
 77 |             self.num_leaves += 1
 78 |             node.leaf_num = self.num_leaves
 79 |             node.is_leaf = True
 80 |             return node
 81 | 
 82 |         best_gain = 0.0
 83 |         best_attributes = []
 84 |         best_tb_obj, best_fb_obj = (0.0, 0.0)
 85 |         best_tb_trigger, best_fb_trigger = (0.0, 0.0)
 86 | 
 87 |         column_count = train_x.shape[1]
 88 |         for col in range(0, column_count):
 89 |             unique_vals = np.unique(train_x[:, col])
 90 | 
 91 |             if self.max_values is not None:
 92 |                 if self.max_values < 1:
 93 |                     idx = np.round(np.linspace(0, len(unique_vals) - 1, self.max_values * len(unique_vals))).astype(int)
 94 |                     unique_vals = unique_vals[idx]
 95 |                 else:
 96 |                     idx = np.round(np.linspace(
 97 |                         0, len(unique_vals) - 1, self.max_values)).astype(int)
 98 |                     unique_vals = unique_vals[idx]
 99 | 
100 |             for value in unique_vals:
101 | 
102 |                 (val_x1, val_x2, val_y1, val_y2, val_t1, val_t2) \
103 |                     = divide_set(val_x, val_y, val_t, col, value)
104 | 
105 |                 (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
106 |                     = divide_set(train_x, train_y, train_t, col, value)
107 | 
108 |                 tb_eval, tb_trigger, tb_mse = self._eval(train_y1, train_t1, val_y1, val_t1)
109 |                 fb_eval, fb_trigger, fb_mse = self._eval(train_y2, train_t2, val_y2, val_t2)
110 | 
111 |                 split_eval = (tb_eval + fb_eval)
112 |                 gain = -node.obj + split_eval
113 | 
114 |                 if gain > best_gain:
115 |                     best_gain = gain
116 |                     best_attributes = [col, value]
117 |                     best_tb_obj, best_fb_obj = (tb_eval, fb_eval)
118 |                     best_tb_trigger, best_fb_trigger = (tb_trigger, fb_trigger)
119 | 
120 |         if best_gain > 0:
121 |             node.col = best_attributes[0]
122 |             node.value = best_attributes[1]
123 | 
124 |             (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
125 |                 = divide_set(train_x, train_y, train_t, node.col, node.value)
126 | 
127 |             (val_x1, val_x2, val_y1, val_y2, val_t1, val_t2) \
128 |                 = divide_set(val_x, val_y, val_t, node.col, node.value)
129 | 
130 |             y1 = np.concatenate((train_y1, val_y1))
131 |             y2 = np.concatenate((train_y2, val_y2))
132 |             t1 = np.concatenate((train_t1, val_t1))
133 |             t2 = np.concatenate((train_t2, val_t2))
134 | 
135 |             best_tb_effect = ace_trigger(y1, t1, best_tb_trigger)
136 |             best_fb_effect = ace_trigger(y2, t2, best_fb_trigger)
137 |             tb_p_val = get_pval_trigger(y1, t1, best_tb_trigger)
138 |             fb_p_val = get_pval_trigger(y2, t2, best_fb_trigger)
139 | 
140 |             self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj
141 | 
142 |             # ----------------------------------------------------------------
143 |             # Ignore "mse" here, come back to it later?
144 |             # ----------------------------------------------------------------
145 | 
146 |             tb = TriggerBaseNode(obj=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val,
147 |                                  node_depth=node.node_depth + 1,
148 |                                  num_samples=y1.shape[0], trigger=best_tb_trigger)
149 |             fb = TriggerBaseNode(obj=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val,
150 |                                  node_depth=node.node_depth + 1,
151 |                                  num_samples=y2.shape[0], trigger=best_fb_trigger)
152 | 
153 |             node.true_branch = self._fit(tb, train_x1, train_y1, train_t1, val_x1, val_y1, val_t1)
154 |             node.false_branch = self._fit(fb, train_x2, train_y2, train_t2, val_x2, val_y2, val_t2)
155 | 
156 |             if node.effect > self.max_effect:
157 |                 self.max_effect = node.effect
158 |             if node.effect < self.min_effect:
159 |                 self.min_effect = node.effect
160 | 
161 |             return node
162 | 
163 |         else:
164 |             if node.effect > self.max_effect:
165 |                 self.max_effect = node.effect
166 |             if node.effect < self.min_effect:
167 |                 self.min_effect = node.effect
168 | 
169 |             self.num_leaves += 1
170 |             node.leaf_num = self.num_leaves
171 |             node.is_leaf = True
172 |             return node
173 | 


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/nn_pehe/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/nn_pehe/__init__.py


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/nn_pehe/balance_split.py:
--------------------------------------------------------------------------------
  1 | from CTL.causal_tree.nn_pehe.tree import *
  2 | 
  3 | 
  4 | class BaseNode(PEHENode):
  5 | 
  6 |     def __init__(self, **kwargs):
  7 |         super().__init__(**kwargs)
  8 | 
  9 |         # self.obj = obj
 10 | 
 11 | 
 12 | # ----------------------------------------------------------------
 13 | # Base causal tree (ctl, base objective)
 14 | # ----------------------------------------------------------------
 15 | class BalanceBasePEHE(PEHETree):
 16 | 
 17 |     def __init__(self, eval2=False, **kwargs):
 18 |         super().__init__(**kwargs)
 19 |         self.root = BaseNode()
 20 |         self.eval2 = eval2
 21 | 
 22 |     def fit(self, x, y, t):
 23 |         if x.shape[0] == 0:
 24 |             return 0
 25 | 
 26 |         # ----------------------------------------------------------------
 27 |         # Seed
 28 |         # ----------------------------------------------------------------
 29 |         np.random.seed(self.seed)
 30 | 
 31 |         self.root.num_samples = y.shape[0]
 32 |         self.num_training = y.shape[0]
 33 | 
 34 |         # ----------------------------------------------------------------
 35 |         # NN_effect estimates
 36 |         # use the overall datasets for nearest neighbor for now
 37 |         # ----------------------------------------------------------------
 38 |         nn_effect = self.compute_nn_effect(x, y, t, k=self.k)
 39 | 
 40 |         # ----------------------------------------------------------------
 41 |         # effect and pvals
 42 |         # ----------------------------------------------------------------
 43 |         effect = tau_squared(y, t)
 44 |         p_val = get_pval(y, t)
 45 |         self.root.effect = effect
 46 |         self.root.p_val = p_val
 47 | 
 48 |         # ----------------------------------------------------------------
 49 |         # Not sure if i should eval in root or not
 50 |         # ----------------------------------------------------------------
 51 |         nn_pehe = self._eval(y, t, nn_effect)
 52 |         self.root.pehe = nn_pehe
 53 |         self.pehe = self.root.pehe
 54 | 
 55 |         # ----------------------------------------------------------------
 56 |         # Add control/treatment means
 57 |         # ----------------------------------------------------------------
 58 |         self.root.control_mean = np.mean(y[t == 0])
 59 |         self.root.treatment_mean = np.mean(y[t == 1])
 60 | 
 61 |         self.root.num_samples = x.shape[0]
 62 | 
 63 |         self._fit(self.root, x, y, t, nn_effect)
 64 | 
 65 |         if self.num_leaves > 0:
 66 |             self.pehe = self.pehe / self.num_leaves
 67 | 
 68 |     def _eval(self, train_y, train_t, nn_effect):
 69 | 
 70 |         # treated = np.where(train_t == 1)[0]
 71 |         # control = np.where(train_t == 0)[0]
 72 |         # pred_effect = np.mean(train_y[treated]) - np.mean(train_y[control])
 73 |         pred_effect = ace(train_y, train_t)
 74 | 
 75 |         # nn_pehe = np.mean((nn_effect - pred_effect) ** 2)
 76 |         nn_pehe = np.sum((nn_effect - pred_effect) ** 2)
 77 | 
 78 |         return nn_pehe
 79 | 
 80 |     def _fit(self, node: BaseNode, train_x, train_y, train_t, nn_effect):
 81 | 
 82 |         if train_x.shape[0] == 0:
 83 |             return node
 84 | 
 85 |         if node.node_depth > self.tree_depth:
 86 |             self.tree_depth = node.node_depth
 87 | 
 88 |         if self.max_depth == self.tree_depth:
 89 |             self.num_leaves += 1
 90 |             node.leaf_num = self.num_leaves
 91 |             node.is_leaf = True
 92 |             return node
 93 | 
 94 |         # print(self.tree_depth, self.obj)
 95 | 
 96 |         best_gain = 0.0
 97 |         # best_gain = node.pehe  # min amount
 98 |         best_attributes = []
 99 |         best_tb_obj, best_fb_obj = (0.0, 0.0)
100 | 
101 |         column_count = train_x.shape[1]
102 |         for col in range(0, column_count):
103 |             unique_vals = np.unique(train_x[:, col])
104 | 
105 |             for value in unique_vals:
106 |                 # check training data size
107 |                 (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
108 |                     = divide_set(train_x, train_y, train_t, col, value)
109 |                 check1 = check_min_size(self.min_size, train_t1)
110 |                 check2 = check_min_size(self.min_size, train_t2)
111 |                 if check1 or check2:
112 |                     continue
113 |                 (_, _, nn_effect1, nn_effect2, _, _) \
114 |                     = divide_set(train_x, nn_effect, train_t, col, value)
115 | 
116 |                 tb_eval = self._eval(train_y1, train_t1, nn_effect1)
117 |                 fb_eval = self._eval(train_y2, train_t2, nn_effect2)
118 | 
119 |                 split_difference = np.abs(tb_eval - fb_eval)
120 | 
121 |                 split_eval = (tb_eval + fb_eval)
122 |                 gain = node.pehe - split_eval - split_difference
123 | 
124 |                 if gain > best_gain:
125 |                     best_gain = gain
126 |                     best_attributes = [col, value]
127 |                     best_tb_obj, best_fb_obj = (tb_eval, fb_eval)
128 |             # if self.eval2:
129 |             #     split_eval, value, tb_eval, fb_eval = self._eval2(unique_vals, train_x, train_y, train_t, nn_effect,
130 |             #                                                       col, node.pehe)
131 |             #
132 |             #     gain = node.pehe - split_eval
133 |             #
134 |             #     if gain > best_gain:
135 |             #         best_gain = gain
136 |             #         best_attributes = [col, value]
137 |             #         best_tb_obj, best_fb_obj = (tb_eval, fb_eval)
138 |             # else:
139 |             #     for value in unique_vals:
140 |             #         # check training data size
141 |             #         (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
142 |             #             = divide_set(train_x, train_y, train_t, col, value)
143 |             #         check1 = check_min_size(self.min_size, train_t1)
144 |             #         check2 = check_min_size(self.min_size, train_t2)
145 |             #         if check1 or check2:
146 |             #             continue
147 |             #         (_, _, nn_effect1, nn_effect2, _, _) \
148 |             #             = divide_set(train_x, nn_effect, train_t, col, value)
149 |             #
150 |             #         tb_eval = self._eval(train_y1, train_t1, nn_effect1)
151 |             #         fb_eval = self._eval(train_y2, train_t2, nn_effect2)
152 |             #
153 |             #         split_eval = (tb_eval + fb_eval)
154 |             #         gain = node.pehe - split_eval
155 |             #
156 |             #         if gain > best_gain:
157 |             #             best_gain = gain
158 |             #             best_attributes = [col, value]
159 |             #             best_tb_obj, best_fb_obj = (tb_eval, fb_eval)
160 | 
161 |         if best_gain > 0:
162 |             node.col = best_attributes[0]
163 |             node.value = best_attributes[1]
164 | 
165 |             (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
166 |                 = divide_set(train_x, train_y, train_t, node.col, node.value)
167 |             (_, _, nn_effect1, nn_effect2, _, _) \
168 |                 = divide_set(train_x, nn_effect, train_t, node.col, node.value)
169 | 
170 |             y1 = train_y1
171 |             y2 = train_y2
172 |             t1 = train_t1
173 |             t2 = train_t2
174 | 
175 |             best_tb_effect = ace(y1, t1)
176 |             best_fb_effect = ace(y2, t2)
177 |             tb_p_val = get_pval(y1, t1)
178 |             fb_p_val = get_pval(y2, t2)
179 | 
180 |             self.pehe = self.pehe - node.pehe + best_tb_obj + best_fb_obj
181 | 
182 |             tb = BaseNode(obj=best_tb_obj, pehe=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val,
183 |                           node_depth=node.node_depth + 1,
184 |                           num_samples=y1.shape[0])
185 |             fb = BaseNode(obj=best_fb_obj, pehe=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val,
186 |                           node_depth=node.node_depth + 1,
187 |                           num_samples=y2.shape[0])
188 | 
189 |             node.true_branch = self._fit(tb, train_x1, train_y1, train_t1, nn_effect1)
190 |             node.false_branch = self._fit(fb, train_x2, train_y2, train_t2, nn_effect2)
191 | 
192 |             if node.effect > self.max_effect:
193 |                 self.max_effect = node.effect
194 |             if node.effect < self.min_effect:
195 |                 self.min_effect = node.effect
196 | 
197 |             return node
198 | 
199 |         else:
200 |             if node.effect > self.max_effect:
201 |                 self.max_effect = node.effect
202 |             if node.effect < self.min_effect:
203 |                 self.min_effect = node.effect
204 | 
205 |             self.num_leaves += 1
206 |             node.leaf_num = self.num_leaves
207 |             node.is_leaf = True
208 |             return node
209 | 


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/nn_pehe/honest.py:
--------------------------------------------------------------------------------
  1 | from CTL.causal_tree.nn_pehe.tree import *
  2 | from sklearn.model_selection import train_test_split
  3 | 
  4 | 
  5 | class HonestNode(PEHENode):
  6 | 
  7 |     def __init__(self, **kwargs):
  8 |         super().__init__(**kwargs)
  9 | 
 10 |         # self.obj = obj
 11 | 
 12 | 
 13 | # ----------------------------------------------------------------
 14 | # Base causal tree (ctl, base objective)
 15 | # ----------------------------------------------------------------
 16 | class HonestPEHE(PEHETree):
 17 | 
 18 |     def __init__(self, **kwargs):
 19 |         super().__init__(**kwargs)
 20 |         self.root = HonestNode()
 21 | 
 22 |     def fit(self, x, y, t):
 23 |         if x.shape[0] == 0:
 24 |             return 0
 25 | 
 26 |         # ----------------------------------------------------------------
 27 |         # Seed
 28 |         # ----------------------------------------------------------------
 29 |         np.random.seed(self.seed)
 30 | 
 31 |         # ----------------------------------------------------------------
 32 |         # Split data
 33 |         # ----------------------------------------------------------------
 34 |         x, est_x, y, est_y, t, est_t = train_test_split(x, y, t, random_state=self.seed, shuffle=True,
 35 |                                                         test_size=0.5)
 36 |         self.root.num_samples = est_y.shape[0]
 37 |         self.num_training = y.shape[0]
 38 | 
 39 |         # ----------------------------------------------------------------
 40 |         # NN_effect estimates
 41 |         # use the overall datasets for nearest neighbor for now
 42 |         # ----------------------------------------------------------------
 43 |         nn_effect = compute_nn_effect(x, y, t, k=self.k)
 44 |         # val_nn_effect = compute_nn_effect(est_x, est_y, est_t, k=self.k)
 45 | 
 46 |         # ----------------------------------------------------------------
 47 |         # effect and pvals
 48 |         # ----------------------------------------------------------------
 49 |         effect = tau_squared(y, t)
 50 |         p_val = get_pval(y, t)
 51 |         self.root.effect = effect
 52 |         self.root.p_val = p_val
 53 | 
 54 |         # ----------------------------------------------------------------
 55 |         # Not sure if i should eval in root or not
 56 |         # ----------------------------------------------------------------
 57 |         nn_pehe = self._eval(y, t, nn_effect)
 58 |         self.root.obj = nn_pehe
 59 |         self.obj = self.root.obj
 60 | 
 61 |         # ----------------------------------------------------------------
 62 |         # Add control/treatment means
 63 |         # ----------------------------------------------------------------
 64 |         self.root.control_mean = np.mean(y[t == 0])
 65 |         self.root.treatment_mean = np.mean(y[t == 1])
 66 | 
 67 |         self.root.num_samples = x.shape[0]
 68 | 
 69 |         self._fit(self.root, x, y, t, nn_effect, est_x, est_y, est_t)
 70 | 
 71 |         if self.num_leaves > 0:
 72 |             self.obj = self.obj / self.num_leaves
 73 | 
 74 |     def _eval(self, train_y, train_t, nn_effect):
 75 | 
 76 |         # total_train = train_y.shape[0]
 77 | 
 78 |         # treated = np.where(train_t == 1)[0]
 79 |         # control = np.where(train_t == 0)[0]
 80 |         # pred_effect = np.mean(train_y[treated]) - np.mean(train_y[control])
 81 |         pred_effect = ace(train_y, train_t)
 82 | 
 83 |         # nn_pehe = np.mean((nn_effect - pred_effect) ** 2)
 84 |         nn_pehe = np.sum((nn_effect - pred_effect) ** 2)
 85 | 
 86 |         # val_effect = ace(val_y, val_t)
 87 |         # val_nn_pehe = np.sum((val_nn_effect - pred_effect) ** 2)
 88 |         # val_train_ratio = total_train / total_val
 89 |         # val_nn_pehe = val_nn_pehe * val_train_ratio
 90 |         # pehe_diff = np.abs(nn_pehe - val_nn_pehe)
 91 | 
 92 |         # cost = np.abs(total_train * pred_effect - total_train * val_effect)
 93 | 
 94 |         var_t, var_c = variance(train_y, train_t)
 95 | 
 96 |         return nn_pehe
 97 | 
 98 |     def _fit(self, node: HonestNode, train_x, train_y, train_t, nn_effect, est_x, est_y, est_t):
 99 | 
100 |         if train_x.shape[0] == 0:
101 |             return node
102 | 
103 |         if node.node_depth > self.tree_depth:
104 |             self.tree_depth = node.node_depth
105 | 
106 |         if self.max_depth == self.tree_depth:
107 |             if node.effect > self.max_effect:
108 |                 self.max_effect = node.effect
109 |             if node.effect < self.min_effect:
110 |                 self.min_effect = node.effect
111 |             self.num_leaves += 1
112 |             node.leaf_num = self.num_leaves
113 |             node.is_leaf = True
114 |             return node
115 | 
116 |         # print(self.tree_depth, self.obj)
117 | 
118 |         best_gain = 0.0
119 |         best_attributes = []
120 |         best_tb_obj, best_fb_obj = (0.0, 0.0)
121 | 
122 |         column_count = train_x.shape[1]
123 |         for col in range(0, column_count):
124 |             unique_vals = np.unique(train_x[:, col])
125 | 
126 |             for value in unique_vals:
127 |                 (est_x1, est_x2, est_y1, est_y2, est_t1, est_t2) \
128 |                     = divide_set(est_x, est_y, est_t, col, value)
129 | 
130 |                 # check est set size
131 |                 if check_min_size(self.min_size, est_t1) or check_min_size(self.min_size, est_t2):
132 |                     continue
133 | 
134 |                 # check training data size
135 |                 (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
136 |                     = divide_set(train_x, train_y, train_t, col, value)
137 |                 check1 = check_min_size(self.min_size, train_t1)
138 |                 check2 = check_min_size(self.min_size, train_t2)
139 |                 if check1 or check2:
140 |                     continue
141 |                 (_, _, nn_effect1, nn_effect2, _, _) \
142 |                     = divide_set(train_x, nn_effect, train_t, col, value)
143 | 
144 |                 tb_eval = self._eval(train_y1, train_t1, nn_effect1)
145 |                 fb_eval = self._eval(train_y2, train_t2, nn_effect2)
146 | 
147 |                 split_eval = (tb_eval + fb_eval)
148 |                 gain = node.obj - split_eval
149 | 
150 |                 if gain > best_gain:
151 |                     best_gain = gain
152 |                     best_attributes = [col, value]
153 |                     best_tb_obj, best_fb_obj = (tb_eval, fb_eval)
154 | 
155 |                 # print(tb_eval, fb_eval, gain, best_gain)
156 | 
157 |         if best_gain > 0:
158 |             node.col = best_attributes[0]
159 |             node.value = best_attributes[1]
160 | 
161 |             (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
162 |                 = divide_set(train_x, train_y, train_t, node.col, node.value)
163 |             (est_x1, est_x2, est_y1, est_y2, est_t1, est_t2) \
164 |                 = divide_set(est_x, est_y, est_t, node.col, node.value)
165 |             (_, _, nn_effect1, nn_effect2, _, _) \
166 |                 = divide_set(train_x, nn_effect, train_t, node.col, node.value)
167 | 
168 |             # y1 = train_y1
169 |             # y2 = train_y2
170 |             # t1 = train_t1
171 |             # t2 = train_t2
172 |             # y1 = np.concatenate((train_y1, val_y1))
173 |             # y2 = np.concatenate((train_y2, val_y2))
174 |             # t1 = np.concatenate((train_t1, val_t1))
175 |             # t2 = np.concatenate((train_t2, val_t2))
176 |             y1 = est_y1
177 |             y2 = est_y2
178 |             t1 = est_t1
179 |             t2 = est_t2
180 | 
181 |             best_tb_effect = ace(y1, t1)
182 |             best_fb_effect = ace(y2, t2)
183 |             tb_p_val = get_pval(y1, t1)
184 |             fb_p_val = get_pval(y2, t2)
185 | 
186 |             self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj
187 | 
188 |             tb = HonestNode(obj=best_tb_obj, pehe=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val,
189 |                             node_depth=node.node_depth + 1,
190 |                             num_samples=train_y1.shape[0])
191 |             fb = HonestNode(obj=best_fb_obj, pehe=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val,
192 |                             node_depth=node.node_depth + 1,
193 |                             num_samples=train_y2.shape[0])
194 | 
195 |             node.true_branch = self._fit(tb, train_x1, train_y1, train_t1, nn_effect1, est_x1, est_y1, est_t1)
196 |             node.false_branch = self._fit(fb, train_x2, train_y2, train_t2, nn_effect2, est_x2, est_y2, est_t2)
197 | 
198 |             if node.effect > self.max_effect:
199 |                 self.max_effect = node.effect
200 |             if node.effect < self.min_effect:
201 |                 self.min_effect = node.effect
202 | 
203 |             return node
204 | 
205 |         else:
206 |             if node.effect > self.max_effect:
207 |                 self.max_effect = node.effect
208 |             if node.effect < self.min_effect:
209 |                 self.min_effect = node.effect
210 | 
211 |             self.num_leaves += 1
212 |             node.leaf_num = self.num_leaves
213 |             node.is_leaf = True
214 |             return node
215 | 


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/nn_pehe/tree.py:
--------------------------------------------------------------------------------
  1 | try:
  2 |     from CTL.causal_tree.util_c import *
  3 | except:
  4 |     from CTL.causal_tree.util import *
  5 | from CTL.causal_tree.ct import *
  6 | import numpy as np
  7 | from scipy.spatial import cKDTree
  8 | 
  9 | 
 10 | # TODO: Add weighting on evaluations
 11 | # TODO: add weighting on k > 1 nearest neighbors?
 12 | 
 13 | def compute_nn_effect(x, y, t, k=1):
 14 |     kdtree = cKDTree(x)
 15 |     d, idx = kdtree.query(x, k=x.shape[0])
 16 |     idx = idx[:, 1:]
 17 |     treated = np.where(t == 1)[0]
 18 |     control = np.where(t == 0)[0]
 19 |     bool_treated = np.isin(idx, treated)
 20 |     bool_control = np.isin(idx, control)
 21 | 
 22 |     nn_effect = np.zeros(x.shape[0])
 23 |     for i in range(len(bool_treated)):
 24 |         i_treat_idx = np.where(bool_treated[i, :])[0][:k]
 25 |         i_control_idx = np.where(bool_control[i, :])[0][:k]
 26 | 
 27 |         i_treat_nn = y[idx[i, i_treat_idx]]
 28 |         i_cont_nn = y[idx[i, i_control_idx]]
 29 | 
 30 |         nn_effect[i] = np.mean(i_treat_nn) - np.mean(i_cont_nn)
 31 | 
 32 |     return nn_effect
 33 | 
 34 | 
 35 | class PEHENode(CTNode):
 36 | 
 37 |     def __init__(self, p_val=1.0, effect=0.0, node_depth=0, control_mean=0.0, treatment_mean=0.0, col=-1, value=-1,
 38 |                  is_leaf=False, leaf_num=-1, num_samples=0.0, obj=0.0, pehe=0.0):
 39 |         super().__init__()
 40 |         # not tree specific features (most likely added at creation)
 41 |         self.p_val = p_val
 42 |         self.effect = effect
 43 |         self.node_depth = node_depth
 44 |         self.control_mean = control_mean
 45 |         self.treatment_mean = treatment_mean
 46 | 
 47 |         # during tree building
 48 |         self.obj = obj
 49 |         self.num_samples = num_samples
 50 |         self.pehe = pehe
 51 | 
 52 |         # after building tree
 53 |         self.col = col
 54 |         self.value = value
 55 |         self.is_leaf = is_leaf
 56 |         self.leaf_num = leaf_num
 57 |         self.true_branch = None
 58 |         self.false_branch = None
 59 | 
 60 |         # after calling functions
 61 |         self.column_name = ""
 62 |         self.decision = ""
 63 | 
 64 | 
 65 | class PEHETree(CausalTree):
 66 | 
 67 |     def __init__(self, split_size=0.5, max_depth=-1, min_size=2, max_values=None, verbose=False,
 68 |                  k=1, use_propensity=False, propensity_model=None,
 69 |                  seed=724):
 70 |         super().__init__()
 71 |         self.val_split = split_size
 72 |         self.max_depth = max_depth
 73 |         self.min_size = min_size
 74 |         self.seed = seed
 75 | 
 76 |         self.max_values = max_values
 77 |         self.verbose = verbose
 78 | 
 79 |         self.max_effect = 0.0
 80 |         self.min_effect = 0.0
 81 | 
 82 |         self.features = None
 83 | 
 84 |         self.k = k
 85 |         self.num_training = 1
 86 |         self.pehe = 0
 87 |         self.use_propensity = use_propensity
 88 |         if use_propensity:
 89 |             if propensity_model is not None:
 90 |                 self.proensity_model = propensity_model
 91 |             else:
 92 |                 from sklearn.linear_model import LogisticRegression
 93 |                 self.proensity_model = LogisticRegression()
 94 | 
 95 |         self.root = PEHENode()
 96 | 
 97 |     def compute_nn_effect(self, x, y, t, k=1):
 98 |         if self.use_propensity:
 99 |             self.proensity_model.fit(x, t)
100 |             propensity = self.proensity_model.predict_proba(x)[:, 1:]
101 |             kdtree = cKDTree(propensity)
102 |             _, idx = kdtree.query(propensity, k=x.shape[0])
103 |         else:
104 |             kdtree = cKDTree(x)
105 |             _, idx = kdtree.query(x, k=x.shape[0])
106 |         idx = idx[:, 1:]
107 |         treated = np.where(t == 1)[0]
108 |         control = np.where(t == 0)[0]
109 |         bool_treated = np.isin(idx, treated)
110 |         bool_control = np.isin(idx, control)
111 | 
112 |         nn_effect = np.zeros(x.shape)
113 |         for i in range(len(bool_treated)):
114 |             i_treat_idx = np.where(bool_treated[i, :])[0][:k]
115 |             i_control_idx = np.where(bool_control[i, :])[0][:k]
116 | 
117 |             i_treat_nn = y[idx[i, i_treat_idx]]
118 |             i_cont_nn = y[idx[i, i_control_idx]]
119 | 
120 |             nn_effect[i] = np.mean(i_treat_nn) - np.mean(i_cont_nn)
121 | 
122 |         return nn_effect
123 | 
124 |     @abstractmethod
125 |     def fit(self, x, y, t):
126 |         pass
127 | 
128 |     def predict(self, x):
129 | 
130 |         def _predict(node: PEHENode, observation):
131 |             if node.is_leaf:
132 |                 return node.effect
133 |             else:
134 |                 v = observation[node.col]
135 |                 if v >= node.value:
136 |                     branch = node.true_branch
137 |                 else:
138 |                     branch = node.false_branch
139 | 
140 |             return _predict(branch, observation)
141 | 
142 |         if len(x.shape) == 1:
143 |             prediction = _predict(self.root, x)
144 |             return prediction
145 | 
146 |         num_test = x.shape[0]
147 | 
148 |         prediction = np.zeros(num_test)
149 | 
150 |         for i in range(num_test):
151 |             test_example = x[i, :]
152 |             prediction[i] = _predict(self.root, test_example)
153 | 
154 |         return prediction
155 | 
156 |     def get_groups(self, x):
157 | 
158 |         def _get_group(node: PEHENode, observation):
159 |             if node.is_leaf:
160 |                 return node.leaf_num
161 |             else:
162 |                 v = observation[node.col]
163 |                 if v >= node.value:
164 |                     branch = node.true_branch
165 |                 else:
166 |                     branch = node.false_branch
167 | 
168 |             return _get_group(branch, observation)
169 | 
170 |         if len(x.shape) == 1:
171 |             return _get_group(self.root, x)
172 |         num_test = x.shape[0]
173 |         leaf_results = np.zeros(num_test)
174 | 
175 |         for i in range(num_test):
176 |             test_example = x[i, :]
177 |             leaf_results[i] = _get_group(self.root, test_example)
178 | 
179 |         return leaf_results
180 | 
181 |     def get_features(self, x):
182 | 
183 |         def _get_features(node: PEHENode, observation, features):
184 |             if node.is_leaf:
185 |                 return features
186 |             else:
187 |                 v = observation[node.col]
188 |                 if v >= node.value:
189 |                     branch = node.true_branch
190 |                 else:
191 |                     branch = node.false_branch
192 | 
193 |             features.append(node.decision)
194 |             return _get_features(branch, observation, features)
195 | 
196 |         if len(x.shape) == 1:
197 |             features = []
198 |             return _get_features(self.root, x, features)
199 |         num_test = x.shape[0]
200 |         leaf_features = []
201 | 
202 |         for i in range(num_test):
203 |             features = []
204 |             test_example = x[i, :]
205 |             leaf_features.append(_get_features(self.root, test_example, features))
206 | 
207 |         return leaf_features
208 | 
209 |     def prune(self, alpha=0.05):
210 | 
211 |         def _prune(node: PEHENode):
212 |             if node.true_branch is None or node.false_branch is None:
213 |                 return
214 | 
215 |             # recursive call for each branch
216 |             if not node.true_branch.is_leaf:
217 |                 _prune(node.true_branch)
218 |             if not node.false_branch.is_leaf:
219 |                 _prune(node.false_branch)
220 | 
221 |             # merge leaves (potentially)
222 |             if node.true_branch.is_leaf and node.false_branch.is_leaf:
223 |                 # Get branches
224 |                 tb = node.true_branch
225 |                 fb = node.false_branch
226 | 
227 |                 tb_pval = tb.p_val
228 |                 fb_pval = fb.p_val
229 | 
230 |                 if tb_pval > alpha and fb_pval > alpha:
231 |                     node.leaf_num = node.true_branch.leaf_num
232 |                     node.true_branch = None
233 |                     node.false_branch = None
234 |                     self.num_leaves = self.num_leaves - 1
235 |                     node.is_leaf = True
236 | 
237 |                     # ----------------------------------------------------------------
238 |                     # Something about obj/mse? if that is added
239 |                     #
240 |                     # - can do a self function so that tree references itself/it's own type of node?
241 |                     # ----------------------------------------------------------------
242 |                     if tb.node_depth == self.tree_depth:
243 |                         self.tree_depth = self.tree_depth - 1
244 | 
245 |         _prune(self.root)
246 | 
247 |     def get_triggers(self, x):
248 |         pass
249 | 
250 |     def save(self, filename):
251 |         import pickle as pkl
252 | 
253 |         check_dir(filename)
254 |         with open(filename, "wb") as file:
255 |             pkl.dump(self, file)
256 | 


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/r_tree/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/r_tree/__init__.py


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/r_tree/tree.py:
--------------------------------------------------------------------------------
  1 | try:
  2 |     from CTL.causal_tree.util_c import *
  3 | except:
  4 |     from CTL.causal_tree.util import *
  5 | from CTL.causal_tree.ct import *
  6 | import numpy as np
  7 | from scipy.spatial import cKDTree
  8 | 
  9 | 
 10 | # TODO: Add weighting on evaluations
 11 | # TODO: add weighting on k > 1 nearest neighbors?
 12 | 
 13 | def compute_nn_effect(x, y, t, k=1):
 14 |     kdtree = cKDTree(x)
 15 |     d, idx = kdtree.query(x, k=x.shape[0])
 16 |     idx = idx[:, 1:]
 17 |     treated = np.where(t == 1)[0]
 18 |     control = np.where(t == 0)[0]
 19 |     bool_treated = np.isin(idx, treated)
 20 |     bool_control = np.isin(idx, control)
 21 | 
 22 |     nn_effect = np.zeros(x.shape)
 23 |     for i in range(len(bool_treated)):
 24 |         i_treat_idx = np.where(bool_treated[i, :])[0][:k]
 25 |         i_control_idx = np.where(bool_control[i, :])[0][:k]
 26 | 
 27 |         i_treat_nn = y[idx[i, i_treat_idx]]
 28 |         i_cont_nn = y[idx[i, i_control_idx]]
 29 | 
 30 |         nn_effect[i] = np.mean(i_treat_nn) - np.mean(i_cont_nn)
 31 | 
 32 |     return nn_effect
 33 | 
 34 | 
 35 | class RNode(CTNode):
 36 | 
 37 |     def __init__(self, p_val=1.0, effect=0.0, node_depth=0, control_mean=0.0, treatment_mean=0.0, col=-1, value=-1,
 38 |                  is_leaf=False, leaf_num=-1, num_samples=0.0, obj=0.0, pehe=0.0):
 39 |         super().__init__()
 40 |         # not tree specific features (most likely added at creation)
 41 |         self.p_val = p_val
 42 |         self.effect = effect
 43 |         self.node_depth = node_depth
 44 |         self.control_mean = control_mean
 45 |         self.treatment_mean = treatment_mean
 46 | 
 47 |         # during tree building
 48 |         self.obj = obj
 49 |         self.num_samples = num_samples
 50 |         self.pehe = pehe
 51 | 
 52 |         # after building tree
 53 |         self.col = col
 54 |         self.value = value
 55 |         self.is_leaf = is_leaf
 56 |         self.leaf_num = leaf_num
 57 |         self.true_branch = None
 58 |         self.false_branch = None
 59 | 
 60 |         # after calling functions
 61 |         self.column_name = ""
 62 |         self.decision = ""
 63 | 
 64 | 
 65 | class RTree(CausalTree):
 66 | 
 67 |     def __init__(self, split_size=0.5, max_depth=-1, min_size=2, max_values=None, verbose=False,
 68 |                  k=1, use_propensity=False, propensity_model=None,
 69 |                  seed=724):
 70 |         super().__init__()
 71 |         self.val_split = split_size
 72 |         self.max_depth = max_depth
 73 |         self.min_size = min_size
 74 |         self.seed = seed
 75 | 
 76 |         self.max_values = max_values
 77 |         self.verbose = verbose
 78 | 
 79 |         self.max_effect = 0.0
 80 |         self.min_effect = 0.0
 81 | 
 82 |         self.features = None
 83 | 
 84 |         self.k = k
 85 |         self.num_training = 1
 86 |         self.pehe = 0
 87 |         self.use_propensity = use_propensity
 88 |         if use_propensity:
 89 |             if propensity_model is not None:
 90 |                 self.proensity_model = propensity_model
 91 |             else:
 92 |                 from sklearn.linear_model import LogisticRegression
 93 |                 self.proensity_model = LogisticRegression()
 94 | 
 95 |         self.root = RNode()
 96 | 
 97 |     def compute_nn_effect(self, x, y, t, k=1):
 98 |         if self.use_propensity:
 99 |             self.proensity_model.fit(x, t)
100 |             propensity = self.proensity_model.predict_proba(x)[:, 1:]
101 |             kdtree = cKDTree(propensity)
102 |             _, idx = kdtree.query(propensity, k=x.shape[0])
103 |         else:
104 |             kdtree = cKDTree(x)
105 |             _, idx = kdtree.query(x, k=x.shape[0])
106 |         idx = idx[:, 1:]
107 |         treated = np.where(t == 1)[0]
108 |         control = np.where(t == 0)[0]
109 |         bool_treated = np.isin(idx, treated)
110 |         bool_control = np.isin(idx, control)
111 | 
112 |         nn_effect = np.zeros(x.shape)
113 |         for i in range(len(bool_treated)):
114 |             i_treat_idx = np.where(bool_treated[i, :])[0][:k]
115 |             i_control_idx = np.where(bool_control[i, :])[0][:k]
116 | 
117 |             i_treat_nn = y[idx[i, i_treat_idx]]
118 |             i_cont_nn = y[idx[i, i_control_idx]]
119 | 
120 |             nn_effect[i] = np.mean(i_treat_nn) - np.mean(i_cont_nn)
121 | 
122 |         return nn_effect
123 | 
124 |     @abstractmethod
125 |     def fit(self, x, y, t):
126 |         pass
127 | 
128 |     def predict(self, x):
129 | 
130 |         def _predict(node: PEHENode, observation):
131 |             if node.is_leaf:
132 |                 return node.effect
133 |             else:
134 |                 v = observation[node.col]
135 |                 if v >= node.value:
136 |                     branch = node.true_branch
137 |                 else:
138 |                     branch = node.false_branch
139 | 
140 |             return _predict(branch, observation)
141 | 
142 |         if len(x.shape) == 1:
143 |             prediction = _predict(self.root, x)
144 |             return prediction
145 | 
146 |         num_test = x.shape[0]
147 | 
148 |         prediction = np.zeros(num_test)
149 | 
150 |         for i in range(num_test):
151 |             test_example = x[i, :]
152 |             prediction[i] = _predict(self.root, test_example)
153 | 
154 |         return prediction
155 | 
156 |     def get_groups(self, x):
157 | 
158 |         def _get_group(node: PEHENode, observation):
159 |             if node.is_leaf:
160 |                 return node.leaf_num
161 |             else:
162 |                 v = observation[node.col]
163 |                 if v >= node.value:
164 |                     branch = node.true_branch
165 |                 else:
166 |                     branch = node.false_branch
167 | 
168 |             return _get_group(branch, observation)
169 | 
170 |         if len(x.shape) == 1:
171 |             return _get_group(self.root, x)
172 |         num_test = x.shape[0]
173 |         leaf_results = np.zeros(num_test)
174 | 
175 |         for i in range(num_test):
176 |             test_example = x[i, :]
177 |             leaf_results[i] = _get_group(self.root, test_example)
178 | 
179 |         return leaf_results
180 | 
181 |     def get_features(self, x):
182 | 
183 |         def _get_features(node: PEHENode, observation, features):
184 |             if node.is_leaf:
185 |                 return features
186 |             else:
187 |                 v = observation[node.col]
188 |                 if v >= node.value:
189 |                     branch = node.true_branch
190 |                 else:
191 |                     branch = node.false_branch
192 | 
193 |             features.append(node.decision)
194 |             return _get_features(branch, observation, features)
195 | 
196 |         if len(x.shape) == 1:
197 |             features = []
198 |             return _get_features(self.root, x, features)
199 |         num_test = x.shape[0]
200 |         leaf_features = []
201 | 
202 |         for i in range(num_test):
203 |             features = []
204 |             test_example = x[i, :]
205 |             leaf_features.append(_get_features(self.root, test_example, features))
206 | 
207 |         return leaf_features
208 | 
209 |     def prune(self, alpha=0.05):
210 | 
211 |         def _prune(node: PEHENode):
212 |             if node.true_branch is None or node.false_branch is None:
213 |                 return
214 | 
215 |             # recursive call for each branch
216 |             if not node.true_branch.is_leaf:
217 |                 _prune(node.true_branch)
218 |             if not node.false_branch.is_leaf:
219 |                 _prune(node.false_branch)
220 | 
221 |             # merge leaves (potentially)
222 |             if node.true_branch.is_leaf and node.false_branch.is_leaf:
223 |                 # Get branches
224 |                 tb = node.true_branch
225 |                 fb = node.false_branch
226 | 
227 |                 tb_pval = tb.p_val
228 |                 fb_pval = fb.p_val
229 | 
230 |                 if tb_pval > alpha and fb_pval > alpha:
231 |                     node.leaf_num = node.true_branch.leaf_num
232 |                     node.true_branch = None
233 |                     node.false_branch = None
234 |                     self.num_leaves = self.num_leaves - 1
235 |                     node.is_leaf = True
236 | 
237 |                     # ----------------------------------------------------------------
238 |                     # Something about obj/mse? if that is added
239 |                     #
240 |                     # - can do a self function so that tree references itself/it's own type of node?
241 |                     # ----------------------------------------------------------------
242 |                     if tb.node_depth == self.tree_depth:
243 |                         self.tree_depth = self.tree_depth - 1
244 | 
245 |         _prune(self.root)
246 | 
247 |     def get_triggers(self, x):
248 |         pass
249 | 
250 |     def save(self, filename):
251 |         import pickle as pkl
252 | 
253 |         check_dir(filename)
254 |         with open(filename, "wb") as file:
255 |             pkl.dump(self, file)
256 | 


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/sig_diff/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/sig_diff/__init__.py


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/sig_diff/sig.py:
--------------------------------------------------------------------------------
  1 | # from CTL.causal_tree.util import *
  2 | try:
  3 |     from CTL.causal_tree.util_c import *
  4 | except:
  5 |     from CTL.causal_tree.util import *
  6 | from CTL.causal_tree.ct import *
  7 | import numpy as np
  8 | from scipy.stats import ttest_ind_from_stats
  9 | 
 10 | 
 11 | class SigNode(CTNode):
 12 | 
 13 |     def __init__(self, p_val=1.0, effect=0.0, node_depth=0, control_mean=0.0, treatment_mean=0.0, col=-1, value=-1,
 14 |                  is_leaf=False, leaf_num=-1, num_samples=0.0, obj=0.0):
 15 |         super().__init__()
 16 |         # not tree specific features (most likely added at creation)
 17 |         self.p_val = p_val
 18 |         self.effect = effect
 19 |         self.node_depth = node_depth
 20 |         self.control_mean = control_mean
 21 |         self.treatment_mean = treatment_mean
 22 | 
 23 |         # during tree building
 24 |         self.obj = obj
 25 |         self.num_samples = num_samples
 26 | 
 27 |         # after building tree
 28 |         self.col = col
 29 |         self.value = value
 30 |         self.is_leaf = is_leaf
 31 |         self.leaf_num = leaf_num
 32 |         self.true_branch = None
 33 |         self.false_branch = None
 34 | 
 35 |         # after calling functions
 36 |         self.column_name = ""
 37 |         self.decision = ""
 38 | 
 39 | 
 40 | class SigTree(CausalTree):
 41 | 
 42 |     def __init__(self, alpha=0.05, max_depth=-1, min_size=2, seed=724, max_values=None, verbose=False):
 43 |         super().__init__()
 44 |         self.alpha = 0.05
 45 |         self.max_depth = max_depth
 46 |         self.min_size = min_size
 47 |         self.seed = seed
 48 | 
 49 |         self.max_values = max_values
 50 |         self.verbose = verbose
 51 | 
 52 |         self.max_effect = 0.0
 53 |         self.min_effect = 0.0
 54 | 
 55 |         self.features = None
 56 | 
 57 |         self.root = SigNode()
 58 | 
 59 |     @abstractmethod
 60 |     def fit(self, x, y, t):
 61 |         pass
 62 | 
 63 |     def _eval_util(self, train_y, train_t):
 64 |         var_t, var_c = variance(train_y, train_t)
 65 |         std = np.sqrt(var_t) + np.sqrt(var_c)
 66 |         effect = ace(train_y, train_t)
 67 | 
 68 |         return effect, std
 69 | 
 70 |     def _eval(self, y_train1, t_train1, y_train2, t_train2):
 71 | 
 72 |         total1 = y_train1.shape[0]
 73 |         total2 = y_train2.shape[0]
 74 | 
 75 |         return_val = (1, 1)
 76 |         if total1 < 1 or total2 < 1:
 77 |             return return_val
 78 | 
 79 |         effect1, std1 = self._eval_util(y_train1, t_train1)
 80 |         effect2, std2 = self._eval_util(y_train2, t_train2)
 81 | 
 82 |         stat, p_val = ttest_ind_from_stats(effect1, std1, total1, effect2, std2, total2)
 83 |         return stat, p_val
 84 | 
 85 |     def predict(self, x):
 86 | 
 87 |         def _predict(node: SigNode, observation):
 88 |             if node.is_leaf:
 89 |                 return node.effect
 90 |             else:
 91 |                 v = observation[node.col]
 92 |                 if v >= node.value:
 93 |                     branch = node.true_branch
 94 |                 else:
 95 |                     branch = node.false_branch
 96 | 
 97 |             return _predict(branch, observation)
 98 | 
 99 |         if len(x.shape) == 1:
100 |             prediction = _predict(self.root, x)
101 |             return prediction
102 | 
103 |         num_test = x.shape[0]
104 | 
105 |         prediction = np.zeros(num_test)
106 | 
107 |         for i in range(num_test):
108 |             test_example = x[i, :]
109 |             prediction[i] = _predict(self.root, test_example)
110 | 
111 |         return prediction
112 | 
113 |     def get_groups(self, x):
114 | 
115 |         def _get_group(node: SigNode, observation):
116 |             if node.is_leaf:
117 |                 return node.leaf_num
118 |             else:
119 |                 v = observation[node.col]
120 |                 if v >= node.value:
121 |                     branch = node.true_branch
122 |                 else:
123 |                     branch = node.false_branch
124 | 
125 |             return _get_group(branch, observation)
126 | 
127 |         if len(x.shape) == 1:
128 |             return _get_group(self.root, x)
129 |         num_test = x.shape[0]
130 |         leaf_results = np.zeros(num_test)
131 | 
132 |         for i in range(num_test):
133 |             test_example = x[i, :]
134 |             leaf_results[i] = _get_group(self.root, test_example)
135 | 
136 |         return leaf_results
137 | 
138 |     def get_features(self, x):
139 | 
140 |         def _get_features(node: SigNode, observation, features):
141 |             if node.is_leaf:
142 |                 return features
143 |             else:
144 |                 v = observation[node.col]
145 |                 if v >= node.value:
146 |                     branch = node.true_branch
147 |                 else:
148 |                     branch = node.false_branch
149 | 
150 |             features.append(node.decision)
151 |             return _get_features(branch, observation, features)
152 | 
153 |         if len(x.shape) == 1:
154 |             features = []
155 |             return _get_features(self.root, x, features)
156 |         num_test = x.shape[0]
157 |         leaf_features = []
158 | 
159 |         for i in range(num_test):
160 |             features = []
161 |             test_example = x[i, :]
162 |             leaf_features.append(_get_features(self.root, test_example, features))
163 | 
164 |         return leaf_features
165 | 
166 |     def prune(self, alpha=0.05):
167 | 
168 |         def _prune(node: SigNode):
169 |             if node.true_branch is None or node.false_branch is None:
170 |                 return
171 | 
172 |             # recursive call for each branch
173 |             if not node.true_branch.is_leaf:
174 |                 _prune(node.true_branch)
175 |             if not node.false_branch.is_leaf:
176 |                 _prune(node.false_branch)
177 | 
178 |             # merge leaves (potentially)
179 |             if node.true_branch.is_leaf and node.false_branch.is_leaf:
180 |                 # Get branches
181 |                 tb = node.true_branch
182 |                 fb = node.false_branch
183 | 
184 |                 tb_pval = tb.p_val
185 |                 fb_pval = fb.p_val
186 | 
187 |                 if tb_pval > alpha and fb_pval > alpha:
188 |                     node.leaf_num = node.true_branch.leaf_num
189 |                     node.true_branch = None
190 |                     node.false_branch = None
191 |                     self.num_leaves = self.num_leaves - 1
192 |                     node.is_leaf = True
193 | 
194 |                     # ----------------------------------------------------------------
195 |                     # Something about obj/mse? if that is added
196 |                     #
197 |                     # - can do a self function so that tree references itself/it's own type of node?
198 |                     # ----------------------------------------------------------------
199 |                     if tb.node_depth == self.tree_depth:
200 |                         self.tree_depth = self.tree_depth - 1
201 | 
202 |         _prune(self.root)
203 | 
204 |     def get_triggers(self, x):
205 |         pass
206 | 
207 |     def save(self, filename):
208 |         import pickle as pkl
209 | 
210 |         check_dir(filename)
211 |         with open(filename, "wb") as file:
212 |             pkl.dump(self, file)


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/sig_diff/sig_base.py:
--------------------------------------------------------------------------------
  1 | from CTL.causal_tree.sig_diff.sig import *
  2 | 
  3 | 
  4 | class BaseCausalTreeLearnNode(SigNode):
  5 | 
  6 |     def __init__(self, **kwargs):
  7 |         super().__init__(**kwargs)
  8 | 
  9 | 
 10 | class SigTreeBase(SigTree):
 11 | 
 12 |     def __init__(self, **kwargs):
 13 |         super().__init__(**kwargs)
 14 |         self.root = BaseCausalTreeLearnNode()
 15 | 
 16 |     def fit(self, x, y, t):
 17 |         if x.shape[0] == 0:
 18 |             return 0
 19 | 
 20 |         # ----------------------------------------------------------------
 21 |         # Seed
 22 |         # ----------------------------------------------------------------
 23 |         np.random.seed(self.seed)
 24 | 
 25 |         train_x, train_y, train_t = x, y, t
 26 |         self.root.num_samples = train_y.shape[0]
 27 |         # ----------------------------------------------------------------
 28 |         # effect and pvals
 29 |         # ----------------------------------------------------------------
 30 |         effect = tau_squared(y, t)
 31 |         p_val = get_pval(y, t)
 32 |         self.root.effect = effect
 33 |         self.root.p_val = p_val
 34 | 
 35 |         self.root.obj = 0
 36 | 
 37 |         # ----------------------------------------------------------------
 38 |         # Add control/treatment means
 39 |         # ----------------------------------------------------------------
 40 |         self.root.control_mean = np.mean(y[t == 0])
 41 |         self.root.treatment_mean = np.mean(y[t == 1])
 42 | 
 43 |         self.root.num_samples = x.shape[0]
 44 | 
 45 |         self._fit(self.root, train_x, train_y, train_t)
 46 | 
 47 |     def _fit(self, node: BaseCausalTreeLearnNode, train_x, train_y, train_t):
 48 | 
 49 |         if train_x.shape[0] == 0:
 50 |             return node
 51 | 
 52 |         if node.node_depth > self.tree_depth:
 53 |             self.tree_depth = node.node_depth
 54 | 
 55 |         if self.max_depth == self.tree_depth:
 56 |             if node.effect > self.max_effect:
 57 |                 self.max_effect = node.effect
 58 |             if node.effect < self.min_effect:
 59 |                 self.min_effect = node.effect
 60 |             self.num_leaves += 1
 61 |             node.leaf_num = self.num_leaves
 62 |             node.is_leaf = True
 63 |             return node
 64 | 
 65 |         best_gain = 1.0
 66 |         best_attributes = []
 67 |         best_tb_obj, best_fb_obj = (0.0, 0.0)
 68 | 
 69 |         column_count = train_x.shape[1]
 70 |         for col in range(0, column_count):
 71 |             unique_vals = np.unique(train_x[:, col])
 72 | 
 73 |             if self.max_values is not None:
 74 |                 if self.max_values < 1:
 75 |                     idx = np.round(np.linspace(
 76 |                         0, len(unique_vals) - 1, self.max_values * len(unique_vals))).astype(int)
 77 |                     unique_vals = unique_vals[idx]
 78 |                 else:
 79 |                     idx = np.round(np.linspace(
 80 |                         0, len(unique_vals) - 1, self.max_values)).astype(int)
 81 |                     unique_vals = unique_vals[idx]
 82 | 
 83 |             for value in unique_vals:
 84 | 
 85 |                 # check training data size
 86 |                 (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
 87 |                     = divide_set(train_x, train_y, train_t, col, value)
 88 |                 check1 = check_min_size(self.min_size, train_t1)
 89 |                 check2 = check_min_size(self.min_size, train_t2)
 90 |                 if check1 or check2:
 91 |                     continue
 92 | 
 93 |                 t_stat, diff_pval = self._eval(train_y1, train_t1, train_y2, train_t2)
 94 | 
 95 |                 gain = diff_pval
 96 | 
 97 |                 if gain < best_gain and gain <= self.alpha:
 98 |                     best_gain = gain
 99 |                     best_attributes = [col, value]
100 | 
101 |         if best_gain <= self.alpha:
102 |             node.col = best_attributes[0]
103 |             node.value = best_attributes[1]
104 | 
105 |             (train_x1, train_x2, train_y1, train_y2, train_t1, train_t2) \
106 |                 = divide_set(train_x, train_y, train_t, node.col, node.value)
107 | 
108 |             y1 = train_y1
109 |             y2 = train_y2
110 |             t1 = train_t1
111 |             t2 = train_t2
112 | 
113 |             best_tb_effect = ace(y1, t1)
114 |             best_fb_effect = ace(y2, t2)
115 |             tb_p_val = get_pval(y1, t1)
116 |             fb_p_val = get_pval(y2, t2)
117 | 
118 |             self.obj = self.obj - node.obj + best_tb_obj + best_fb_obj
119 | 
120 |             tb = BaseCausalTreeLearnNode(obj=best_tb_obj, effect=best_tb_effect, p_val=tb_p_val,
121 |                                          node_depth=node.node_depth + 1,
122 |                                          num_samples=y1.shape[0])
123 |             fb = BaseCausalTreeLearnNode(obj=best_fb_obj, effect=best_fb_effect, p_val=fb_p_val,
124 |                                          node_depth=node.node_depth + 1,
125 |                                          num_samples=y2.shape[0])
126 | 
127 |             node.true_branch = self._fit(tb, train_x1, train_y1, train_t1)
128 |             node.false_branch = self._fit(fb, train_x2, train_y2, train_t2)
129 | 
130 |             if node.effect > self.max_effect:
131 |                 self.max_effect = node.effect
132 |             if node.effect < self.min_effect:
133 |                 self.min_effect = node.effect
134 | 
135 |             return node
136 | 
137 |         else:
138 |             if node.effect > self.max_effect:
139 |                 self.max_effect = node.effect
140 |             if node.effect < self.min_effect:
141 |                 self.min_effect = node.effect
142 | 
143 |             self.num_leaves += 1
144 |             node.leaf_num = self.num_leaves
145 |             node.is_leaf = True
146 |             return node
147 | 


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/util.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import errno
  3 | import numpy as np
  4 | from scipy.stats import ttest_ind
  5 | import subprocess
  6 | import time
  7 | 
  8 | 
  9 | def check_dir(path):
 10 |     if not os.path.exists(os.path.dirname(path)):
 11 |         try:
 12 |             os.makedirs(os.path.dirname(path))
 13 |         except OSError as exc:
 14 |             if exc.errno != errno.EEXIST:
 15 |                 raise
 16 | 
 17 | 
 18 | def divide_set(x, y, t, col, value):
 19 |     idx1 = x[:, col] >= value
 20 |     idx2 = ~idx1
 21 | 
 22 |     x1 = x[idx1]
 23 |     x2 = x[idx2]
 24 | 
 25 |     y1 = y[idx1]
 26 |     y2 = y[idx2]
 27 | 
 28 |     t1 = t[idx1]
 29 |     t2 = t[idx2]
 30 | 
 31 |     return x1, x2, y1, y2, t1, t2
 32 | 
 33 | 
 34 | def tau_squared(y, t):
 35 |     total = y.shape[0]
 36 | 
 37 |     return_val = (-np.inf, -np.inf)
 38 | 
 39 |     if total == 0:
 40 |         return return_val
 41 | 
 42 |     treat_vect = t
 43 | 
 44 |     effect = ace(y, treat_vect)
 45 |     err = (effect ** 2) * total
 46 | 
 47 |     return effect
 48 | 
 49 | 
 50 | def tau_squared_trigger(outcome, treatment, min_size=1, quartile=False):
 51 |     """Continuous case"""
 52 |     total = outcome.shape[0]
 53 | 
 54 |     return_val = (-np.inf, -np.inf)
 55 | 
 56 |     if total == 0:
 57 |         return return_val
 58 | 
 59 |     unique_treatment = np.unique(treatment)
 60 | 
 61 |     if unique_treatment.shape[0] == 1:
 62 |         return return_val
 63 | 
 64 |     unique_treatment = (unique_treatment[1:] + unique_treatment[:-1]) / 2
 65 |     unique_treatment = unique_treatment[1:-1]
 66 | 
 67 |     if quartile:
 68 |         first_quartile = int(np.floor(unique_treatment.shape[0] / 4))
 69 |         third_quartile = int(np.ceil(3 * unique_treatment.shape[0] / 4))
 70 | 
 71 |         unique_treatment = unique_treatment[first_quartile:third_quartile]
 72 | 
 73 |     yy = np.tile(outcome, (unique_treatment.shape[0], 1))
 74 |     tt = np.tile(treatment, (unique_treatment.shape[0], 1))
 75 | 
 76 |     x = np.transpose(np.transpose(tt) > unique_treatment)
 77 | 
 78 |     tt[x] = 1
 79 |     tt[np.logical_not(x)] = 0
 80 | 
 81 |     treat_num = np.sum(tt == 1, axis=1)
 82 |     cont_num = np.sum(tt == 0, axis=1)
 83 |     min_size_idx = np.where(np.logical_and(
 84 |         treat_num >= min_size, cont_num >= min_size))
 85 | 
 86 |     unique_treatment = unique_treatment[min_size_idx]
 87 |     tt = tt[min_size_idx]
 88 |     yy = yy[min_size_idx]
 89 | 
 90 |     if tt.shape[0] == 0:
 91 |         return return_val
 92 | 
 93 |     y_t_m = np.sum((yy * (tt == 1)), axis=1) / np.sum(tt == 1, axis=1)
 94 |     y_c_m = np.sum((yy * (tt == 0)), axis=1) / np.sum(tt == 0, axis=1)
 95 | 
 96 |     effect = y_t_m - y_c_m
 97 |     err = effect ** 2
 98 | 
 99 |     max_err = np.argmax(err)
100 | 
101 |     best_effect = effect[max_err]
102 |     best_err = err[max_err]
103 |     best_split = unique_treatment[max_err]
104 | 
105 |     best_err = total * best_err
106 | 
107 |     return best_effect, best_split
108 | 
109 | 
110 | def ace(y, t):
111 |     treat = t >= 0.5
112 |     # control = t == 0
113 |     control = ~treat
114 | 
115 |     yt = y[treat]
116 |     yc = y[control]
117 | 
118 |     mu1 = 0.0
119 |     mu0 = 0.0
120 |     if yt.shape[0] != 0:
121 |         mu1 = np.mean(yt)
122 |     if yc.shape[0] != 0:
123 |         mu0 = np.mean(yc)
124 | 
125 |     return mu1 - mu0
126 | 
127 | 
128 | def ace_trigger(y, t, trigger):
129 |     treat = t >= trigger
130 |     control = ~treat
131 | 
132 |     yt = y[treat]
133 |     yc = y[control]
134 | 
135 |     mu1 = 0.0
136 |     mu0 = 0.0
137 |     if yt.shape[0] != 0:
138 |         mu1 = np.mean(yt)
139 |     if yc.shape[0] != 0:
140 |         mu0 = np.mean(yc)
141 | 
142 |     return mu1 - mu0
143 | 
144 | 
145 | def get_pval(y, t):
146 |     treat = t == 1
147 |     # control = t == 0
148 |     control = ~treat
149 | 
150 |     outcome_cont = y[treat]
151 |     outcome_trt = y[control]
152 | 
153 |     p_val = ttest_ind(outcome_cont, outcome_trt)[1]
154 | 
155 |     if np.isnan(p_val):
156 |         return 0.000
157 | 
158 |     return p_val
159 | 
160 | 
161 | def get_pval_trigger(y, t, trigger):
162 |     treat = t >= trigger
163 |     control = ~treat
164 | 
165 |     outcome_cont = y[treat]
166 |     outcome_trt = y[control]
167 | 
168 |     p_val = ttest_ind(outcome_cont, outcome_trt)[1]
169 | 
170 |     if np.isnan(p_val):
171 |         return 0.000
172 | 
173 |     return p_val
174 | 
175 | 
176 | def min_size_value_bool(min_size, t, trigger=0.5):
177 |     nt, nc = get_treat_size(t, trigger=trigger)
178 | 
179 |     return nt, nc, nt < min_size or nc < min_size
180 | 
181 | 
182 | def check_min_size(min_size, t, trigger=0.5):
183 |     nt, nc = get_treat_size(t, trigger)
184 | 
185 |     return nt < min_size or nc < min_size
186 | 
187 | 
188 | def get_treat_size(t, trigger=0.5):
189 |     treated = t >= trigger
190 |     control = ~treated
191 |     num_treatment = t[treated].shape[0]
192 |     num_control = t[control].shape[0]
193 | 
194 |     return num_treatment, num_control
195 | 
196 | 
197 | def variance(y, t):
198 |     treat_vect = t
199 | 
200 |     treat = treat_vect == 1
201 |     # control = treat_vect == 0
202 |     control = ~treat
203 | 
204 |     if y.shape[0] == 0:
205 |         return np.array([np.inf, np.inf])
206 | 
207 |     yt = y[treat]
208 |     yc = y[control]
209 | 
210 |     if yt.shape[0] == 0:
211 |         var_t = np.var(y)
212 |     else:
213 |         var_t = np.var(yt)
214 | 
215 |     if yc.shape[0] == 0:
216 |         var_c = np.var(y)
217 |     else:
218 |         var_c = np.var(yc)
219 | 
220 |     return var_t, var_c
221 | 
222 | 
223 | def variance_trigger(y, t, trigger):
224 |     treat_vect = t
225 | 
226 |     treat = treat_vect >= trigger
227 |     # control = treat_vect == 0
228 |     control = ~treat
229 | 
230 |     if y.shape[0] == 0:
231 |         return np.array([np.inf, np.inf])
232 | 
233 |     yt = y[treat]
234 |     yc = y[control]
235 | 
236 |     if yt.shape[0] == 0:
237 |         var_t = np.var(y)
238 |     else:
239 |         var_t = np.var(yt)
240 | 
241 |     if yc.shape[0] == 0:
242 |         var_c = np.var(y)
243 |     else:
244 |         var_c = np.var(yc)
245 | 
246 |     return var_t, var_c
247 | 
248 | 
249 | def col_dict(names):
250 |     feat_names = {}
251 |     for i, name in enumerate(names):
252 |         column = "Column %s" % i
253 |         feat_names[column] = name
254 |     return feat_names
255 | 


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/util_c.cpython-310-darwin.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/build/lib.macosx-12.6-arm64-cpython-310/CTL/causal_tree/util_c.cpython-310-darwin.so


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/pehe_tree.py:
--------------------------------------------------------------------------------
 1 | from CTL._tree import _CausalTree
 2 | from CTL.causal_tree.nn_pehe.base import *
 3 | from CTL.causal_tree.nn_pehe.val import *
 4 | from CTL.causal_tree.nn_pehe.honest import *
 5 | from CTL.causal_tree.nn_pehe.balance_split import *
 6 | 
 7 | 
 8 | class PEHETree(_CausalTree):
 9 | 
10 |     def __init__(self, min_size=2, max_depth=-1, k=1,
11 |                  val=False, split_size=0.5,
12 |                  honest=False,
13 |                  use_propensity=False, propensity_model=None,
14 |                  balance=False,
15 |                  seed=724):
16 |         super().__init__()
17 | 
18 |         params = {
19 |             "min_size": min_size,
20 |             "max_depth": max_depth,
21 |             "k": k,
22 |             "seed": seed,
23 |             "split_size": split_size,
24 |             "use_propensity": use_propensity,
25 |             "propensity_model": propensity_model
26 |         }
27 |         if val:
28 |             self.tree = ValPEHE(**params)
29 |         elif honest:
30 |             self.tree = HonestPEHE(**params)
31 |         elif balance:
32 |             self.tree = BalanceBasePEHE(**params)
33 |         else:
34 |             self.tree = BasePEHE(**params)
35 | 
36 |         self.column_num = 0
37 |         self.fitted = False
38 |         self.tree_depth = 0
39 | 
40 |         self.obj = 0
41 |         self.pehe = 0
42 | 
43 |     def fit(self, x, y, t):
44 |         self.column_num = x.shape[1]
45 |         x = x.astype(np.float64)
46 |         y = y.astype(np.float64)
47 |         t = t.astype(np.float64)
48 |         self.tree.fit(x, y, t)
49 |         self.fitted = True
50 |         self.tree_depth = self.tree.tree_depth
51 |         self.obj = self.tree.obj
52 |         self.pehe = self.tree.pehe
53 | 


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/sig_diff_tree.py:
--------------------------------------------------------------------------------
 1 | from CTL._tree import _CausalTree
 2 | from CTL.causal_tree.sig_diff.sig_base import SigTreeBase
 3 | from CTL.causal_tree.sig_diff.sig_val import SigTreeVal
 4 | import numpy as np
 5 | 
 6 | 
 7 | class SigDiffTree(_CausalTree):
 8 | 
 9 |     def __init__(self, alpha=0.05, min_size=2, max_depth=-1, val=False, split_size=0.5, seed=724):
10 |         super().__init__()
11 | 
12 |         params = {
13 |             "alpha": alpha,
14 |             "min_size": min_size,
15 |             "max_depth": max_depth,
16 |             "seed": seed,
17 |         }
18 |         if val:
19 |             params["split_size"] = split_size
20 |             self.tree = SigTreeVal(**params)
21 |         else:
22 |             self.tree = SigTreeBase(**params)
23 | 
24 |         self.column_num = 0
25 |         self.fitted = False
26 |         self.tree_depth = 0
27 | 
28 |         self.obj = 0
29 | 
30 |     def fit(self, x, y, t):
31 |         self.column_num = x.shape[1]
32 |         x = x.astype(np.float64)
33 |         y = y.astype(np.float64)
34 |         t = t.astype(np.float64)
35 |         self.tree.fit(x, y, t)
36 |         self.fitted = True
37 |         self.tree_depth = self.tree.tree_depth
38 |         self.obj = self.tree.obj
39 | 


--------------------------------------------------------------------------------
/build/lib.macosx-12.6-arm64-cpython-310/CTL/tree.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | 
 4 | class Node(ABC):
 5 | 
 6 |     def __init__(self):
 7 |         self.is_leaf = False
 8 | 
 9 | 
10 | class Tree(ABC):
11 | 
12 |     def __init__(self):
13 |         pass
14 | 
15 |     @abstractmethod
16 |     def fit(self, x, y, t):
17 |         pass
18 | 
19 |     @abstractmethod
20 |     def predict(self, x):
21 |         pass
22 | 


--------------------------------------------------------------------------------
/build/temp.macosx-12.6-arm64-cpython-310/CTL/causal_tree/util_c.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/build/temp.macosx-12.6-arm64-cpython-310/CTL/causal_tree/util_c.o


--------------------------------------------------------------------------------
/causal_tree_learn.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 2.1
 2 | Name: causal-tree-learn
 3 | Version: 2.43
 4 | Summary: Python implementation of causal trees with validation
 5 | Home-page: https://github.com/edgeslab/CTL
 6 | Author: Christopher Tran
 7 | Author-email: ctran29@uic.edu
 8 | Classifier: Programming Language :: Python :: 3
 9 | Classifier: License :: OSI Approved :: MIT License
10 | Classifier: Operating System :: OS Independent
11 | Requires-Python: >=3.6
12 | Description-Content-Type: text/markdown
13 | License-File: LICENSE
14 | 
15 | # CTL
16 | 
17 | Christopher Tran, Elena Zheleva, ["Learning Triggers for Heterogeneous Treatment Effects", AAAI 2019.](https://arxiv.org/pdf/1902.00087.pdf)
18 | 
19 | Our method is based on and adapted from: https://github.com/susanathey/causalTree
20 | 
21 | 
22 | ## Requirements
23 | * Python 3
24 | * sklearn
25 | * scipy
26 | * graphviz (if you want to plot the tree)
27 | 
28 | ## Installation
29 | 
30 | through pip
31 | 
32 | ```bash
33 | pip install causal_tree_learn
34 | ```
35 | 
36 | or clone the repository
37 | ```bash
38 | python setup.py build_ext --inplace
39 | ```
40 | 
41 | ## Demo Code
42 | 
43 | Two demo codes are available to run.
44 | 
45 | ```bash
46 | python binary_example.py
47 | ```
48 | Runs the tree on a binary example (asthma.txt)
49 | 
50 | ```bash
51 | python trigger_example.py
52 | ```
53 | Runs a tree on a trigger problem where the treatment is continuous (note for now the example is made up and treatment does not affect outcome, this is only to show example code)
54 | 


--------------------------------------------------------------------------------
/causal_tree_learn.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
 1 | LICENSE
 2 | README.md
 3 | pyproject.toml
 4 | setup.py
 5 | CTL/__init__.py
 6 | CTL/_tree.py
 7 | CTL/causal_learn_forest.py
 8 | CTL/causal_tree_learn.py
 9 | CTL/causal_tree_match.py
10 | CTL/pehe_tree.py
11 | CTL/sig_diff_tree.py
12 | CTL/tree.py
13 | CTL/causal_tree/__init__.py
14 | CTL/causal_tree/ct.py
15 | CTL/causal_tree/util.py
16 | CTL/causal_tree/util_c.c
17 | CTL/causal_tree/util_c.pyx
18 | CTL/causal_tree/ctl/__init__.py
19 | CTL/causal_tree/ctl/adaptive.py
20 | CTL/causal_tree/ctl/binary_ctl.py
21 | CTL/causal_tree/ctl/ctl_base.py
22 | CTL/causal_tree/ctl/ctl_honest.py
23 | CTL/causal_tree/ctl/ctl_val_honest.py
24 | CTL/causal_tree/ctl/honest.py
25 | CTL/causal_tree/ctl_match/__init__.py
26 | CTL/causal_tree/ctl_match/binary_ctl.py
27 | CTL/causal_tree/ctl_match/ctl_base.py
28 | CTL/causal_tree/ctl_trigger/__init__.py
29 | CTL/causal_tree/ctl_trigger/adaptive_trigger.py
30 | CTL/causal_tree/ctl_trigger/ctl_base_trigger.py
31 | CTL/causal_tree/ctl_trigger/ctl_honest_trigger.py
32 | CTL/causal_tree/ctl_trigger/ctl_val_honest_trigger.py
33 | CTL/causal_tree/ctl_trigger/honest_trigger.py
34 | CTL/causal_tree/ctl_trigger/trigger_ctl.py
35 | CTL/causal_tree/nn_pehe/__init__.py
36 | CTL/causal_tree/nn_pehe/balance_split.py
37 | CTL/causal_tree/nn_pehe/base.py
38 | CTL/causal_tree/nn_pehe/honest.py
39 | CTL/causal_tree/nn_pehe/tree.py
40 | CTL/causal_tree/nn_pehe/val.py
41 | CTL/causal_tree/r_tree/__init__.py
42 | CTL/causal_tree/r_tree/base.py
43 | CTL/causal_tree/r_tree/tree.py
44 | CTL/causal_tree/sig_diff/__init__.py
45 | CTL/causal_tree/sig_diff/sig.py
46 | CTL/causal_tree/sig_diff/sig_base.py
47 | CTL/causal_tree/sig_diff/sig_val.py
48 | causal_tree_learn.egg-info/PKG-INFO
49 | causal_tree_learn.egg-info/SOURCES.txt
50 | causal_tree_learn.egg-info/dependency_links.txt
51 | causal_tree_learn.egg-info/requires.txt
52 | causal_tree_learn.egg-info/top_level.txt


--------------------------------------------------------------------------------
/causal_tree_learn.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/causal_tree_learn.egg-info/requires.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scikit-learn
3 | scipy
4 | 


--------------------------------------------------------------------------------
/causal_tree_learn.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | CTL
2 | 


--------------------------------------------------------------------------------
/dist/causal-tree-learn-2.43.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/dist/causal-tree-learn-2.43.tar.gz


--------------------------------------------------------------------------------
/dist/causal_tree_learn-2.43-cp310-cp310-macosx_12_0_arm64.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edgeslab/CTL/63a9ea00ac9eaa0611eb796189b4956c1b3a01f9/dist/causal_tree_learn-2.43-cp310-cp310-macosx_12_0_arm64.whl


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "causal-tree-learn"
 3 | version = "2.42"
 4 | description = ""
 5 | authors = ["Christopher Tran <ctran29@uic.edu>"]
 6 | license = "License :: OSI Approved :: MIT License"
 7 | 
 8 | [tool.poetry.dependencies]
 9 | python = ">=3.8,<3.12"
10 | numpy = "^1.23.3"
11 | scikit-learn = "^1.1.2"
12 | scipy = "^1.9.2"
13 | Cython = "^0.29.32"
14 | twine = "^4.0.1"
15 | 
16 | [tool.poetry.dev-dependencies]
17 | 
18 | [build-system]
19 | requires = ["poetry-core>=1.0.0"]
20 | build-backend = "poetry.core.masonry.api"
21 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # from setuptools import setup
 2 | from setuptools import find_packages
 3 | from distutils.core import setup
 4 | from distutils.extension import Extension
 5 | from Cython.Distutils import build_ext
 6 | from Cython.Build import cythonize
 7 | import numpy as np
 8 | 
 9 | try:
10 |     from Cython.Distutils import build_ext
11 | except ImportError:
12 |     use_cython = False
13 | else:
14 |     use_cython = True
15 | 
16 | with open("README.md", "r") as fh:
17 |     long_description = fh.read()
18 | 
19 | cmdclass = {}
20 | ext_modules = []
21 | 
22 | if use_cython:
23 |     ext_modules = [
24 |         Extension(name="CTL.causal_tree.util_c", sources=["CTL/causal_tree/util_c.pyx"],
25 |                   include_dirs=[np.get_include(), "."]),
26 |     ]
27 |     cmdclass.update({'build_ext': build_ext})
28 | else:
29 |     # ext_modules = [
30 |     #     Extension(name="CTL.causal_tree.util_c", sources=["CTL/causal_tree/util_c.pyx",  "CTL/causal_tree/util_c.c"],
31 |     #               include_dirs=[np.get_include(), "."]),
32 |     # ]
33 |     ext_modules = [
34 |         Extension(name="CTL.causal_tree.util_c", sources=["CTL/causal_tree/util_c.c"],
35 |                   include_dirs=[np.get_include(), "."]),
36 |     ]
37 | 
38 | 
39 | setup(
40 |     name="causal-tree-learn",
41 |     version="2.43",
42 |     author="Christopher Tran",
43 |     author_email="ctran29@uic.edu",
44 |     description="Python implementation of causal trees with validation",
45 |     long_description=long_description,
46 |     long_description_content_type="text/markdown",
47 |     url="https://github.com/edgeslab/CTL",
48 |     packages=find_packages(),
49 |     classifiers=[
50 |         "Programming Language :: Python :: 3",
51 |         "License :: OSI Approved :: MIT License",
52 |         "Operating System :: OS Independent",
53 |     ],
54 |     install_requires=['numpy',
55 |                       'scikit-learn',
56 |                       'scipy'
57 |                       ],
58 |     python_requires='>=3.6',
59 |     ext_modules=cythonize(ext_modules),
60 |     # cmdclass={'build_ext': build_ext},
61 |     cmdclass=cmdclass,
62 |     setup_requires=["cython", "numpy"],
63 |     package_data={"CTL.causal_tree": ["util_c.c", "util_c.pyx"]}
64 | )
65 | 


--------------------------------------------------------------------------------
/trigger_example.py:
--------------------------------------------------------------------------------
 1 | from CTL.causal_tree_learn import CausalTree
 2 | from sklearn.model_selection import train_test_split
 3 | import numpy as np
 4 | 
 5 | np.random.seed(0)
 6 | 
 7 | x = np.random.randn(100, 10)
 8 | y = np.random.randn(100)
 9 | treatment = np.random.randn(100)
10 | 
11 | x_train, x_test, y_train, y_test, treat_train, treat_test = train_test_split(x, y, treatment,
12 |                                                                              test_size=0.5, random_state=42)
13 | 
14 | variable_names = []
15 | for i in range(x.shape[1]):
16 |     variable_names.append(f"Column {i}")
17 | 
18 | # regular CTL
19 | ctl = CausalTree(cont=True)
20 | ctl.fit(x_train, y_train, treat_train)
21 | ctl_predict = ctl.predict(x_test)
22 | 
23 | # honest CTL
24 | cth = CausalTree(cont=True, honest=True)
25 | cth.fit(x_train, y_train, treat_train)
26 | cth_predict = cth.predict(x_test)
27 | 
28 | # val honest CTL
29 | cthv = CausalTree(cont=True, val_honest=True)
30 | cthv.fit(x_train, y_train, treat_train)
31 | cthv_predict = cthv.predict(x_test)
32 | 
33 | # adaptive CT
34 | ct_adaptive = CausalTree(weight=0.0, split_size=0.0, cont=True)
35 | ct_adaptive.fit(x_train, y_train, treat_train)
36 | ct_adaptive_predict = ct_adaptive.predict(x_test)
37 | 
38 | # honest CT
39 | ct_honest = CausalTree(honest=True, weight=0.0, split_size=0.0, cont=True)
40 | ct_honest.fit(x_train, y_train, treat_train)
41 | ct_honest_predict = ct_honest.predict(x_test)
42 | 
43 | # to get which examples are in which leaf
44 | groups = cthv.get_groups(x_test)
45 | 
46 | # to get triggers
47 | triggers = cthv.get_triggers(x_test)
48 | print(triggers)
49 | 
50 | # to get features used, input the columns
51 | features_used = cthv.get_variables_used(variable_names)
52 | print(features_used)
53 | 
54 | # to get the decision for every example
55 | features = cthv.get_features(x)
56 | print(features)
57 | 
58 | # if you want to plot a tree
59 | cthv.plot_tree(filename="output/trigger_tree")
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------