├── .gitignore ├── LICENSE ├── README.md ├── demos.py ├── mliv ├── __init__.py ├── dataset │ ├── __init__.py │ └── demand │ │ ├── __init__.py │ │ └── demand_v1.py ├── inference │ ├── __init__.py │ ├── deep │ │ ├── __init__.py │ │ ├── deepiv_v1.py │ │ └── dfiv_v1.py │ ├── dflearning │ │ ├── __init__.py │ │ └── dfl_v1.py │ ├── gmm │ │ ├── __init__.py │ │ ├── agmm_v1 │ │ │ ├── __init__.py │ │ │ ├── net.py │ │ │ ├── oadam.py │ │ │ ├── rbflayer.py │ │ │ └── trainer.py │ │ └── deepgmm_v1 │ │ │ ├── __init__.py │ │ │ ├── dataclass.py │ │ │ ├── model.py │ │ │ ├── nn_structure │ │ │ ├── __init__.py │ │ │ ├── nn_structure_for_demand_image.py │ │ │ ├── nn_structure_for_demand_old.py │ │ │ ├── nn_structure_for_dsprite.py │ │ │ └── nn_structure_for_sin.py │ │ │ ├── trainer.py │ │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── custom_logging.py │ │ │ └── pytorch_linear_reg_utils.py │ ├── onestage │ │ ├── __init__.py │ │ └── onesiv_v1.py │ ├── sieve │ │ ├── __init__.py │ │ ├── dualiv_v1.py │ │ └── kerneliv_v1.py │ └── twosls │ │ ├── __init__.py │ │ ├── nn2sls_v1.py │ │ ├── poly2sls_v1.py │ │ └── vanilla2sls_v1.py └── utils │ ├── __init__.py │ ├── loaddata.py │ └── setenv.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # about ignore 2 | *Results/ 3 | *Result/ 4 | *results/ 5 | *result/ 6 | *Data/ 7 | *data/ 8 | *run/ 9 | *ckpts/ 10 | *build/ 11 | *dist/ 12 | *mliv.egg-info/ 13 | *cache/ 14 | *miniImageNet_WRN_60Epoch/ 15 | 16 | # Byte-compiled / optimized / DLL files 17 | __pycache__/ 18 | *.py[cod] 19 | *$py.class 20 | 21 | # C extensions 22 | *.so 23 | 24 | # Distribution / packaging 25 | .Python 26 | build/ 27 | develop-eggs/ 28 | dist/ 29 | downloads/ 30 | eggs/ 31 | .eggs/ 32 | lib/ 33 | lib64/ 34 | parts/ 35 | sdist/ 36 | var/ 37 | wheels/ 38 | pip-wheel-metadata/ 39 | share/python-wheels/ 40 | *.egg-info/ 41 | .installed.cfg 42 | *.egg 43 | MANIFEST 44 | 45 | # PyInstaller 46 | # Usually these files are written by a python script from a template 47 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 48 | *.manifest 49 | *.spec 50 | 51 | # Installer logs 52 | pip-log.txt 53 | pip-delete-this-directory.txt 54 | 55 | # Unit test / coverage reports 56 | htmlcov/ 57 | .tox/ 58 | .nox/ 59 | .coverage 60 | .coverage.* 61 | .cache 62 | nosetests.xml 63 | coverage.xml 64 | *.cover 65 | *.py,cover 66 | .hypothesis/ 67 | .pytest_cache/ 68 | 69 | # Translations 70 | *.mo 71 | *.pot 72 | 73 | # Django stuff: 74 | *.log 75 | local_settings.py 76 | db.sqlite3 77 | db.sqlite3-journal 78 | 79 | # Flask stuff: 80 | instance/ 81 | .webassets-cache 82 | 83 | # Scrapy stuff: 84 | .scrapy 85 | 86 | # Sphinx documentation 87 | docs/_build/ 88 | 89 | # PyBuilder 90 | target/ 91 | 92 | # Jupyter Notebook 93 | .ipynb_checkpoints 94 | 95 | # IPython 96 | profile_default/ 97 | ipython_config.py 98 | 99 | # pyenv 100 | .python-version 101 | 102 | # pipenv 103 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 104 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 105 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 106 | # install all needed dependencies. 107 | #Pipfile.lock 108 | 109 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 110 | __pypackages__/ 111 | 112 | # Celery stuff 113 | celerybeat-schedule 114 | celerybeat.pid 115 | 116 | # SageMath parsed files 117 | *.sage.py 118 | 119 | # Environments 120 | .env 121 | .venv 122 | env/ 123 | venv/ 124 | ENV/ 125 | env.bak/ 126 | venv.bak/ 127 | 128 | # Spyder project settings 129 | .spyderproject 130 | .spyproject 131 | 132 | # Rope project settings 133 | .ropeproject 134 | 135 | # mkdocs documentation 136 | /site 137 | 138 | # mypy 139 | .mypy_cache/ 140 | .dmypy.json 141 | dmypy.json 142 | 143 | # Pyre type checker 144 | .pyre/ 145 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 causal-machine-learning-lab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mliv 2 | 3 | 4 | ```python 5 | from mliv.dataset.demand import gen_data 6 | from mliv.utils import CausalDataset 7 | gen_data() 8 | data = CausalDataset('./Data/Demand/0.5_1.0_0.0_10000/1/') 9 | 10 | from mliv.inference import Vanilla2SLS 11 | from mliv.inference import Poly2SLS 12 | from mliv.inference import NN2SLS 13 | from mliv.inference import OneSIV 14 | from mliv.inference import KernelIV 15 | from mliv.inference import DualIV 16 | from mliv.inference import DFL 17 | from mliv.inference import AGMM 18 | from mliv.inference import DeepGMM 19 | from mliv.inference import DFIV 20 | from mliv.inference import DeepIV # Tensorflow & keras 21 | 22 | for mod in [OneSIV,KernelIV,DualIV,DFL,AGMM,DeepGMM,DFIV,Vanilla2SLS,Poly2SLS,NN2SLS]: 23 | model = mod() 24 | model.config['num'] = 100 25 | model.config['epochs'] = 10 26 | model.fit(data) 27 | 28 | print(mod) 29 | 30 | 31 | try: 32 | model = DeepIV() 33 | model.config['num'] = 100 34 | model.config['epochs'] = 10 35 | model.fit(data) 36 | 37 | print(mod) 38 | except: 39 | print(f'Error: {mod} is implemented by Tensorflow.') 40 | 41 | 42 | ``` -------------------------------------------------------------------------------- /demos.py: -------------------------------------------------------------------------------- 1 | from mliv.dataset.demand import gen_data 2 | from mliv.utils import CausalDataset 3 | gen_data() 4 | data = CausalDataset('./Data/Demand/0.5_1.0_0.0_10000/1/') 5 | 6 | from mliv.inference import Vanilla2SLS 7 | from mliv.inference import Poly2SLS 8 | from mliv.inference import NN2SLS 9 | from mliv.inference import OneSIV 10 | from mliv.inference import KernelIV 11 | from mliv.inference import DualIV 12 | from mliv.inference import DFL 13 | from mliv.inference import AGMM 14 | from mliv.inference import DeepGMM 15 | from mliv.inference import DFIV 16 | try: 17 | from mliv.inference import DeepIV 18 | except: 19 | pass 20 | 21 | for mod in [OneSIV,KernelIV,DualIV,DFL,AGMM,DeepGMM,DFIV,Vanilla2SLS,Poly2SLS,NN2SLS]: 22 | 23 | try: 24 | model = mod() 25 | model.config['num'] = 100 26 | model.config['epochs'] = 10 27 | model.fit(data) 28 | 29 | print(mod) 30 | except: 31 | print('Error: ...') 32 | 33 | try: 34 | model = DeepIV() 35 | model.config['num'] = 100 36 | model.config['epochs'] = 10 37 | model.fit(data) 38 | 39 | print(mod) 40 | except: 41 | print(f'Error: ...{mod}...') 42 | -------------------------------------------------------------------------------- /mliv/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | name = "mliv" 5 | __version__ = "0.0.1" 6 | __all__ = [ 7 | "dataset", 8 | "inference", 9 | "utils", 10 | ] 11 | -------------------------------------------------------------------------------- /mliv/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/causal-machine-learning-lab/mliv/58c45ae08888b97d470a1837205cf35bc34b03d2/mliv/dataset/__init__.py -------------------------------------------------------------------------------- /mliv/dataset/demand/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from .demand_v1 import generate_Demand_train, generate_Demand_test, set_Configuration, config 4 | 5 | example = ''' 6 | from mliv.dataset.demand import gen_data 7 | from mliv.utils import CausalDataset 8 | gen_data() 9 | data = CausalDataset('./Data/Demand/0.5_1.0_0.0_10000/1/') 10 | ''' 11 | 12 | def gen_data(config=config): 13 | config, config_trt, config_val, config_tst = set_Configuration(config) 14 | exps = config['exps'] 15 | dataName = config['dataName'] 16 | path = './Data/{}/{}_{}_{}_{}/'.format(config['dataName'],config['rho'],config['alpha'],config['beta'],config['num']) 17 | print(f'The path: {path}') 18 | 19 | for exp in range(exps): 20 | 21 | print(f'Generate {dataName} datasets - {exp}/{exps}. ') 22 | 23 | config_trt['seed'], config_val['seed'], config_tst['seed'] = config_trt['seed'] + exp*333, config_val['seed'] + exp*444, config_tst['seed'] + exp*555 24 | 25 | train = generate_Demand_train(**config_trt) 26 | valid = generate_Demand_train(**config_val) 27 | test = generate_Demand_test(**config_tst) 28 | 29 | data_path = path + '/{}/'.format(exp) 30 | os.makedirs(os.path.dirname(data_path), exist_ok=True) 31 | 32 | train.to_csv(data_path + '/train.csv', index=False) 33 | valid.to_csv(data_path + '/valid.csv', index=False) 34 | test.to_csv(data_path + '/test.csv', index=False) 35 | 36 | configs = {'config':config, 'config_trt':config_trt, 'config_val':config_val, 'config_tst':config_tst} 37 | with open(data_path + "/configs.json", "w") as file: 38 | file.write( json.dumps(configs) ) 39 | 40 | return config -------------------------------------------------------------------------------- /mliv/dataset/demand/demand_v1.py: -------------------------------------------------------------------------------- 1 | from itertools import product 2 | import numpy as np 3 | from numpy.random import default_rng 4 | from pandas import DataFrame 5 | import os 6 | 7 | np.random.seed(42) 8 | 9 | example = ''' 10 | from mliv.dataset.demand.demand_v1 import generate_Demand_train, generate_Demand_test, set_Configuration 11 | 12 | config, config_trt, config_val, config_tst = set_Configuration() 13 | 14 | train = generate_Demand_train(**config_trt) 15 | valid = generate_Demand_train(**config_val) 16 | test = generate_Demand_test(**config_tst) 17 | ''' 18 | 19 | config = { 20 | 'dataName': 'Demand', 21 | 'exps': 10, 22 | 'num': 10000, 23 | 'rho': 0.5, 24 | 'alpha': 1.0, 25 | 'beta': 0.0, 26 | 'seed': 2022, 27 | 'num_val': 10000, 28 | 'seed_val': 3033, 29 | 'seed_tst': 4044 30 | } 31 | 32 | def set_Configuration(config=config): 33 | config_trt = {} 34 | keys_trt = ['num', 'rho', 'alpha', 'beta', 'seed'] 35 | for key in keys_trt: 36 | config_trt[key] = config[key] 37 | 38 | config_val = {} 39 | keys_val = ['rho', 'alpha', 'beta'] 40 | for key in keys_val: 41 | config_val[key] = config[key] 42 | config_val['num'] = config['num_val'] 43 | config_val['seed'] = config['seed_val'] 44 | 45 | config_tst = {} 46 | keys_tst = ['rho', 'alpha', 'beta'] 47 | for key in keys_tst: 48 | config_tst[key] = config[key] 49 | config_tst['seed'] = config['seed_tst'] 50 | 51 | return config, config_trt, config_val, config_tst 52 | 53 | def h(t): 54 | return 2 * ((t - 5) ** 4 / 600 + np.exp(-4 * (t - 5) ** 2) + t / 10 - 2) 55 | 56 | def f(p, t, s): 57 | return 100 + (10 + p) * s * h(t) - 2 * p 58 | 59 | def generate_Demand_train(num=10000, rho=0.5, alpha=1, beta=0, seed=2021): 60 | 61 | rng=default_rng(seed) 62 | 63 | emotion = rng.choice(list(range(1, 8)), (num,1)) 64 | time = rng.uniform(0, 10, (num,1)) 65 | cost = rng.normal(0, 1.0, (num,1)) 66 | noise_price = rng.normal(0, 1.0, (num,1)) 67 | noise_demand = rho * noise_price + rng.normal(0, np.sqrt(1 - rho ** 2), (num,1)) 68 | price = 25 + (alpha * cost + 3) * h(time) + beta * cost + noise_price 69 | structural = f(price, time, emotion).astype(float) 70 | outcome = (structural + noise_demand).astype(float) 71 | 72 | mu0 = f(price-price, time, emotion).astype(float) 73 | mut = structural 74 | 75 | numpys = [noise_price,noise_demand, cost, time, emotion, time, emotion, price, mu0, mut, structural, outcome] 76 | 77 | train_data = DataFrame(np.concatenate(numpys, axis=1), 78 | columns=['u1','u2','z1','x1','x2','c1','a1','t1','m0','mt','g1','y1']) 79 | 80 | return train_data 81 | 82 | def generate_Demand_test(rho=0.5, alpha=1, beta=0, seed=2021): 83 | 84 | rng=default_rng(seed) 85 | 86 | noise_price = rng.normal(0, 1.0, (2800,1)) 87 | noise_demand = rho * noise_price + rng.normal(0, np.sqrt(1 - rho ** 2), (2800,1)) 88 | 89 | cost = np.linspace(-1.0, 1.0, 20) 90 | time = np.linspace(0.0, 10, 20) 91 | emotion = np.array([1, 2, 3, 4, 5, 6, 7]) 92 | 93 | data = [] 94 | price_z = [] 95 | for c, t, s in product(cost, time, emotion): 96 | data.append([c, t, s]) 97 | price_z.append(25 + (alpha * c + 3) * h(t) + beta * c) 98 | features = np.array(data) 99 | price_z = np.array(price_z)[:, np.newaxis] 100 | price = price_z + noise_price 101 | 102 | structural = f(price, features[:,1:2], features[:,2:3]).astype(float) 103 | outcome = (structural + noise_demand).astype(float) 104 | 105 | mu0 = f(price-price, features[:,1:2], features[:,2:3]).astype(float) 106 | mut = structural 107 | 108 | numpys = [noise_price, noise_demand, features, features[:,1:3], price, mu0, mut, structural, outcome] 109 | 110 | test_data = DataFrame(np.concatenate(numpys, axis=1), 111 | columns=['u1','u2','z1','x1','x2','c1','a1','t1','m0','mt','g1','y1']) 112 | 113 | return test_data -------------------------------------------------------------------------------- /mliv/inference/__init__.py: -------------------------------------------------------------------------------- 1 | from .twosls import Vanilla2SLS, Poly2SLS, NN2SLS 2 | from .dflearning import DFL 3 | from .onestage import OneSIV 4 | from .sieve import KernelIV, DualIV 5 | from .gmm import AGMM, DeepGMM 6 | from .deep import DFIV 7 | try: 8 | from .deep import DeepIV 9 | except: 10 | pass -------------------------------------------------------------------------------- /mliv/inference/deep/__init__.py: -------------------------------------------------------------------------------- 1 | from .dfiv_v1 import DFIV 2 | try: 3 | from .deepiv_v1 import DeepIV 4 | except: 5 | pass -------------------------------------------------------------------------------- /mliv/inference/deep/deepiv_v1.py: -------------------------------------------------------------------------------- 1 | # from __future__ import absolute_import, division, print_function, unicode_literals 2 | 3 | import keras 4 | import types 5 | import random 6 | import numpy as np 7 | import tensorflow as tf 8 | from mliv.utils import set_seed 9 | 10 | from keras import backend as K 11 | from keras.layers import Input, Dense, Convolution2D, Dense, Dropout, Flatten, MaxPooling2D, Lambda 12 | from keras.models import Model, Sequential 13 | from keras.layers.merge import Concatenate 14 | from keras.backend import clear_session 15 | from keras.engine.topology import InputLayer 16 | from keras.regularizers import l2 17 | from keras.constraints import maxnorm 18 | from keras.utils import np_utils 19 | from keras.layers.core import Reshape 20 | 21 | if K.backend() == "theano": 22 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams 23 | import theano.tensor as tensor 24 | _FLOATX = theano.config.floatX 25 | Lop = tensor.Lop 26 | elif K.backend() == "tensorflow": 27 | def Lop(output, wrt, eval_points): 28 | grads = tf.gradients(output, wrt, grad_ys=eval_points) 29 | return grads 30 | 31 | example = ''' 32 | from mliv.inference import DeepIV 33 | 34 | model = DeepIV() 35 | model.fit(data) 36 | ITE = model.predict(data.train) 37 | ATE,_ = model.ATE(data.train) 38 | ''' 39 | 40 | ############ from DeepIV_True.custom_gradients import replace_gradients_mse 41 | 42 | def get_gradients(self, loss, params): 43 | ''' 44 | Replacement for the default keras get_gradients() function. 45 | Modification: checks if the object has the attribute grads and 46 | returns that rather than calculating the gradients using automatic 47 | differentiation. 48 | ''' 49 | if hasattr(self, 'grads'): 50 | grads = self.grads 51 | else: 52 | grads = K.gradients(loss, params) 53 | if hasattr(self, 'clipnorm') and self.clipnorm > 0: 54 | norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads])) 55 | grads = [clip_norm(g, self.clipnorm, norm) for g in grads] 56 | if hasattr(self, 'clipvalue') and self.clipvalue > 0: 57 | grads = [K.clip(g, -self.clipvalue, self.clipvalue) for g in grads] 58 | return grads 59 | 60 | def replace_gradients_mse(model, opt, batch_size, n_samples = 1): 61 | ''' 62 | Replace the gradients of a Keras model with mean square error loss. 63 | ''' 64 | # targets has been repeated twice so the below creates two identical columns 65 | # of the target values - we'll only use the first column. 66 | targets = K.reshape(model.targets[0], (batch_size, n_samples * 2)) 67 | output = K.mean(K.reshape(model.outputs[0], (batch_size, n_samples, 2)), axis=1) 68 | # compute d Loss / d output 69 | dL_dOutput = (output[:,0] - targets[:,0]) * (2.) / batch_size 70 | # compute (d Loss / d output) (d output / d theta) for each theta 71 | trainable_weights = model.trainable_weights 72 | grads = Lop(output[:,1], wrt=trainable_weights, eval_points=dL_dOutput) 73 | # compute regularizer gradients 74 | 75 | # add loss with respect to regularizers 76 | reg_loss = model.total_loss * 0. 77 | for r in model.losses: 78 | reg_loss += r 79 | reg_grads = K.gradients(reg_loss, trainable_weights) 80 | grads = [g+r for g,r in zip(grads, reg_grads)] 81 | 82 | opt = keras.optimizers.get(opt) 83 | # Patch keras gradient calculation to allow for user defined gradients 84 | opt.get_gradients = types.MethodType( get_gradients, opt ) 85 | opt.grads = grads 86 | model.optimizer = opt 87 | return model 88 | 89 | ######### import DeepIV_True.densities as densities 90 | 91 | def split(start, stop): 92 | return Lambda(lambda x: x[:, start:stop], output_shape=(None, stop-start)) 93 | 94 | def split_mixture_of_gaussians(x, n_components): 95 | pi = split(0, n_components)(x) 96 | mu = split(n_components, 2*n_components)(x) 97 | log_sig = split(2*n_components, 3*n_components)(x) 98 | return pi, mu, log_sig 99 | 100 | def log_norm_pdf(x, mu, log_sig): 101 | z = (x - mu) / (K.exp(K.clip(log_sig, -40, 40))) #TODO: get rid of this clipping 102 | return -(0.5)*K.log(2*np.pi) - log_sig - 0.5*((z)**2) 103 | 104 | def mix_gaussian_loss(x, mu, log_sig, w): 105 | ''' 106 | Combine the mixture of gaussian distribution and the loss into a single function 107 | so that we can do the log sum exp trick for numerical stability... 108 | ''' 109 | if K.backend() == "tensorflow": 110 | x.set_shape([None, 1]) 111 | gauss = log_norm_pdf(K.repeat_elements(x=x, rep=mu.shape[1], axis=1), mu, log_sig) 112 | # TODO: get rid of clipping. 113 | gauss = K.clip(gauss, -40, 40) 114 | max_gauss = K.maximum((0.), K.max(gauss)) 115 | # log sum exp trick... 116 | gauss = gauss - max_gauss 117 | out = K.sum(w * K.exp(gauss), axis=1) 118 | loss = K.mean(-K.log(out) + max_gauss) 119 | return loss 120 | 121 | def mixture_of_gaussian_output(x, n_components): 122 | mu = keras.layers.Dense(n_components, activation='linear')(x) 123 | log_sig = keras.layers.Dense(n_components, activation='linear')(x) 124 | pi = keras.layers.Dense(n_components, activation='softmax')(x) 125 | return Concatenate(axis=1)([pi, mu, log_sig]) 126 | 127 | def mixture_of_gaussian_loss(y_true, y_pred, n_components): 128 | pi, mu, log_sig = split_mixture_of_gaussians(y_pred, n_components) 129 | return mix_gaussian_loss(y_true, mu, log_sig, pi) 130 | 131 | ######### import DeepIV_True.samplers as samplers 132 | 133 | def random_laplace(shape, mu=0., b=1.): 134 | ''' 135 | Draw random samples from a Laplace distriubtion. 136 | 137 | See: https://en.wikipedia.org/wiki/Laplace_distribution#Generating_random_variables_according_to_the_Laplace_distribution 138 | ''' 139 | U = K.random_uniform(shape, -0.5, 0.5) 140 | return mu - b * K.sign(U) * K.log(1 - 2 * K.abs(U)) 141 | 142 | def random_normal(shape, mean=0.0, std=1.0): 143 | return K.random_normal(shape, mean, std) 144 | 145 | def random_multinomial(logits, seed=None): 146 | ''' 147 | Theano function for sampling from a multinomal with probability given by `logits` 148 | ''' 149 | if K.backend() == "theano": 150 | if seed is None: 151 | seed = np.random.randint(1, 10e6) 152 | rng = RandomStreams(seed=seed) 153 | return rng.multinomial(n=1, pvals=logits, ndim=None, dtype=_FLOATX) 154 | elif K.backend() == "tensorflow": 155 | return tf.one_hot(tf.squeeze(tf.multinomial(K.log(logits), num_samples=1)), 156 | int(logits.shape[1])) 157 | 158 | def random_gmm(pi, mu, sig): 159 | ''' 160 | Sample from a gaussian mixture model. Returns one sample for each row in 161 | the pi, mu and sig matrices... this is potentially wasteful (because you have to repeat 162 | the matrices n times if you want to get n samples), but makes it easy to implment 163 | code where the parameters vary as they are conditioned on different datapoints. 164 | ''' 165 | normals = random_normal(K.shape(mu), mu, sig) 166 | k = random_multinomial(pi) 167 | return K.sum(normals * k, axis=1, keepdims=True) 168 | 169 | 170 | ######### from DeepIV_True.models import Treatment, Response 171 | 172 | class Treatment(Model): 173 | ''' 174 | Adds sampling functionality to a Keras model and extends the losses to support 175 | mixture of gaussian losses. 176 | 177 | # Argument 178 | ''' 179 | 180 | def _get_sampler_by_string(self, loss): 181 | output = self.outputs[0] 182 | inputs = self.inputs 183 | 184 | if loss in ["MSE", "mse", "mean_squared_error"]: 185 | output += random_normal(K.shape(output), mean=0.0, std=1.0) 186 | draw_sample = K.function(inputs + [K.learning_phase()], [output]) 187 | 188 | def sample_gaussian(inputs, use_dropout=False): 189 | ''' 190 | Helper to draw samples from a gaussian distribution 191 | ''' 192 | return draw_sample(inputs + [int(use_dropout)])[0] 193 | 194 | return sample_gaussian 195 | 196 | elif loss == "binary_crossentropy": 197 | output = K.random_binomial(K.shape(output), p=output) 198 | draw_sample = K.function(inputs + [K.learning_phase()], [output]) 199 | 200 | def sample_binomial(inputs, use_dropout=False): 201 | ''' 202 | Helper to draw samples from a binomial distribution 203 | ''' 204 | return draw_sample(inputs + [int(use_dropout)])[0] 205 | 206 | return sample_binomial 207 | 208 | elif loss in ["mean_absolute_error", "mae", "MAE"]: 209 | output += random_laplace(K.shape(output), mu=0.0, b=1.0) 210 | draw_sample = K.function(inputs + [K.learning_phase()], [output]) 211 | def sample_laplace(inputs, use_dropout=False): 212 | ''' 213 | Helper to draw samples from a Laplacian distribution 214 | ''' 215 | return draw_sample(inputs + [int(use_dropout)])[0] 216 | 217 | return sample_laplace 218 | 219 | elif loss == "mixture_of_gaussians": 220 | pi, mu, log_sig = split_mixture_of_gaussians(output, self.n_components) 221 | samples = random_gmm(pi, mu, K.exp(log_sig)) 222 | draw_sample = K.function(inputs + [K.learning_phase()], [samples]) 223 | return lambda inputs, use_dropout: draw_sample(inputs + [int(use_dropout)])[0] 224 | 225 | else: 226 | raise NotImplementedError("Unrecognised loss: %s. Cannot build a generic sampler" % loss) 227 | 228 | def _prepare_sampler(self, loss): 229 | ''' 230 | Build sampler 231 | ''' 232 | if isinstance(loss, str): 233 | self._sampler = self._get_sampler_by_string(loss) 234 | else: 235 | warnings.warn("You're using a custom loss function. Make sure you implement\ 236 | the model's sample() fuction yourself.") 237 | 238 | def compile(self, optimizer, loss, metrics=None, loss_weights=None, 239 | sample_weight_mode=None, n_components=None, **kwargs): 240 | ''' 241 | Overrides the existing keras compile function to add a sampler building 242 | step to the model compilation phase. Once compiled, one can draw samples 243 | from the network using the sample() function and adds support for mixture 244 | of gaussian loss. 245 | 246 | ''' 247 | if loss == "mixture_of_gaussians": 248 | if n_components is None: 249 | raise Exception("When using mixture of gaussian loss you must\ 250 | supply n_components argument") 251 | self.n_components = n_components 252 | self._prepare_sampler(loss) 253 | loss = lambda y_true, y_pred: mixture_of_gaussian_loss(y_true,y_pred,n_components) 254 | else: 255 | self._prepare_sampler(loss) 256 | 257 | super(Treatment, self).compile(optimizer, loss, metrics=metrics, loss_weights=loss_weights, 258 | sample_weight_mode=sample_weight_mode, **kwargs) 259 | 260 | def sample(self, inputs, n_samples=1, use_dropout=False): 261 | ''' 262 | Draw samples from the keras model. 263 | ''' 264 | if hasattr(self, "_sampler"): 265 | if not isinstance(inputs, list): 266 | inputs = [inputs] 267 | inputs = [i.repeat(n_samples, axis=0) for i in inputs] 268 | return self._sampler(inputs, use_dropout) 269 | else: 270 | raise Exception("Compile model with loss before sampling") 271 | 272 | class Response(Model): 273 | ''' 274 | Extends the Keras Model class to support sampling from the Treatment 275 | model during training. 276 | 277 | Overwrites the existing fit_generator function. 278 | 279 | # Arguments 280 | In addition to the standard model arguments, a Response object takes 281 | a Treatment object as input so that it can sample from the fitted treatment 282 | distriubtion during training. 283 | ''' 284 | def __init__(self, treatment, **kwargs): 285 | if isinstance(treatment, Treatment): 286 | self.treatment = treatment 287 | else: 288 | raise TypeError("Expected a treatment model of type Treatment. \ 289 | Got a model of type %s. Remember to train your\ 290 | treatment model first." % type(treatment)) 291 | super(Response, self).__init__(**kwargs) 292 | 293 | def compile(self, optimizer, loss, metrics=None, loss_weights=None, sample_weight_mode=None, 294 | unbiased_gradient=False,n_samples=1, batch_size=None): 295 | super(Response, self).compile(optimizer=optimizer, loss=loss, loss_weights=loss_weights, 296 | sample_weight_mode=sample_weight_mode) 297 | self.unbiased_gradient = unbiased_gradient 298 | if unbiased_gradient: 299 | if loss in ["MSE", "mse", "mean_squared_error"]: 300 | if batch_size is None: 301 | raise ValueError("Must supply a batch_size argument if using unbiased gradients. Currently batch_size is None.") 302 | replace_gradients_mse(self, optimizer, batch_size=batch_size, n_samples=n_samples) 303 | else: 304 | warnings.warn("Unbiased gradient only implemented for mean square error loss. It is unnecessary for\ 305 | logistic losses and currently not implemented for absolute error losses.") 306 | 307 | 308 | def fit(self, x=None, y=None, batch_size=512, epochs=1, verbose=1, callbacks=None, 309 | validation_data=None, class_weight=None, initial_epoch=0, samples_per_batch=None, 310 | seed=None, observed_treatments=None): 311 | ''' 312 | Trains the model by sampling from the fitted treament distribution. 313 | 314 | # Arguments 315 | x: list of numpy arrays. The first element should *always* be the instrument variables. 316 | y: (numpy array). Target response variables. 317 | The remainder of the arguments correspond to the Keras definitions. 318 | ''' 319 | batch_size = np.minimum(y.shape[0], batch_size) 320 | if seed is None: 321 | seed = np.random.randint(0, 1e6) 322 | if samples_per_batch is None: 323 | if self.unbiased_gradient: 324 | samples_per_batch = 2 325 | else: 326 | samples_per_batch = 1 327 | 328 | if observed_treatments is None: 329 | generator = SampledSequence(x[1:], x[0], y, batch_size, self.treatment.sample, samples_per_batch) 330 | else: 331 | generator = OnesidedUnbaised(x[1:], x[0], y, observed_treatments, batch_size, 332 | self.treatment.sample, samples_per_batch) 333 | 334 | steps_per_epoch = y.shape[0] // batch_size 335 | super(Response, self).fit_generator(generator=generator, 336 | steps_per_epoch=steps_per_epoch, 337 | epochs=epochs, verbose=verbose, 338 | callbacks=callbacks, validation_data=validation_data, 339 | class_weight=class_weight, initial_epoch=initial_epoch) 340 | 341 | def fit_generator(self, **kwargs): 342 | ''' 343 | We use override fit_generator to support sampling from the treatment model during training. 344 | 345 | If you need this functionality, you'll need to build a generator that samples from the 346 | treatment and performs whatever transformations you're performing. Please submit a pull 347 | request if you implement this. 348 | ''' 349 | raise NotImplementedError("We use override fit_generator to support sampling from the\ 350 | treatment model during training.") 351 | 352 | def expected_representation(self, x, z, n_samples=100, batch_size=None, seed=None): 353 | inputs = [z, x] 354 | if not hasattr(self, "_E_representation"): 355 | if batch_size is None: 356 | batch_size = inputs[0].shape[0] 357 | steps = 1 358 | else: 359 | steps = inputs[0].shape[0] // batch_size 360 | 361 | intermediate_layer_model = Model(inputs=self.inputs, 362 | outputs=self.layers[-2].output) 363 | 364 | def pred(inputs, n_samples=100, seed=None): 365 | features = inputs[1] 366 | 367 | samples = self.treatment.sample(inputs, n_samples) 368 | batch_features = [features.repeat(n_samples, axis=0)] + [samples] 369 | representation = intermediate_layer_model.predict(batch_features) 370 | return representation.reshape((inputs[0].shape[0], n_samples, -1)).mean(axis=1) 371 | self._E_representation = pred 372 | return self._E_representation(inputs, n_samples, seed) 373 | else: 374 | return self._E_representation(inputs, n_samples, seed) 375 | 376 | def conditional_representation(self, x, p): 377 | inputs = [x, p] 378 | if not hasattr(self, "_c_representation"): 379 | intermediate_layer_model = Model(inputs=self.inputs, 380 | outputs=self.layers[-2].output) 381 | 382 | self._c_representation = intermediate_layer_model.predict 383 | return self._c_representation(inputs) 384 | else: 385 | return self._c_representation(inputs) 386 | 387 | def dropout_predict(self, x, z, n_samples=100): 388 | if isinstance(x, list): 389 | inputs = [z] + x 390 | else: 391 | inputs = [z, x] 392 | if not hasattr(self, "_dropout_predict"): 393 | 394 | predict_with_dropout = K.function(self.inputs + [K.learning_phase()], 395 | [self.layers[-1].output]) 396 | 397 | def pred(inputs, n_samples = 100): 398 | # draw samples from the treatment network with dropout turned on 399 | samples = self.treatment.sample(inputs, n_samples, use_dropout=True) 400 | # prepare inputs for the response network 401 | rep_inputs = [i.repeat(n_samples, axis=0) for i in inputs[1:]] + [samples] 402 | # return outputs from the response network with dropout turned on (learning_phase=0) 403 | return predict_with_dropout(rep_inputs + [1])[0] 404 | self._dropout_predict = pred 405 | return self._dropout_predict(inputs, n_samples) 406 | else: 407 | return self._dropout_predict(inputs, n_samples) 408 | 409 | def credible_interval(self, x, z, n_samples=100, p=0.95): 410 | ''' 411 | Return a credible interval of size p using dropout variational inference. 412 | ''' 413 | if isinstance(x, list): 414 | n = x[0].shape[0] 415 | else: 416 | n = x.shape[0] 417 | alpha = (1-p) / 2. 418 | samples = self.dropout_predict(x, z, n_samples).reshape((n, n_samples, -1)) 419 | upper = np.percentile(samples.copy(), 100*(p+alpha), axis=1) 420 | lower = np.percentile(samples.copy(), 100*(alpha), axis=1) 421 | return lower, upper 422 | 423 | def _add_constant(self, X): 424 | return np.concatenate((np.ones((X.shape[0], 1)), X), axis=1) 425 | 426 | def predict_confidence(self, x, p): 427 | if hasattr(self, "_predict_confidence"): 428 | return self._predict_confidence(x, p) 429 | else: 430 | raise Exception("Call fit_confidence_interval before running predict_confidence") 431 | 432 | 433 | def fit_confidence_interval(self, x_lo, z_lo, p_lo, y_lo, n_samples=100, alpha=0.): 434 | eta_bar = self.expected_representation(x=x_lo, z=z_lo, n_samples=n_samples) 435 | pca = PCA(1-1e-16, svd_solver="full", whiten=True) 436 | pca.fit(eta_bar) 437 | 438 | eta_bar = pca.transform(eta_bar) 439 | eta_lo_prime = pca.transform(self.conditional_representation(x_lo, p_lo)) 440 | eta_lo = self._add_constant(eta_lo_prime) 441 | 442 | ols1 = linear_model.Ridge(alpha=alpha, fit_intercept=True) 443 | ols1.fit(eta_bar, eta_lo_prime) 444 | hhat = ols1.predict(eta_bar) 445 | ols2 = linear_model.Ridge(alpha=alpha, fit_intercept=False) 446 | ols2.fit(self._add_constant(hhat), y_lo) 447 | 448 | yhat = ols2.predict(eta_lo) 449 | hhi = np.linalg.inv(np.dot(eta_lo.T, eta_lo)) 450 | 451 | heh = np.dot(eta_lo.T, np.square(y_lo - yhat) * eta_lo) 452 | V = np.dot(np.dot(hhi, heh), hhi) 453 | 454 | def pred(xx, pp): 455 | H = self._add_constant(pca.transform(self.conditional_representation(xx,pp))) 456 | sdhb = np.sqrt(np.diag(np.dot(np.dot(H, V), H.T))) 457 | hb = ols2.predict(H).flatten() 458 | return hb, sdhb 459 | 460 | self._predict_confidence = pred 461 | 462 | class SampledSequence(keras.utils.Sequence): 463 | def __init__(self, features, instruments, outputs, batch_size, sampler, n_samples=1, seed=None): 464 | self.rng = np.random.RandomState(seed) 465 | if not isinstance(features, list): 466 | features = [features.copy()] 467 | else: 468 | features = [f.copy() for f in features] 469 | self.features = features 470 | self.instruments = instruments.copy() 471 | self.outputs = outputs.copy() 472 | if batch_size < self.instruments.shape[0]: 473 | self.batch_size = batch_size 474 | else: 475 | self.batch_size = self.instruments.shape[0] 476 | self.sampler = sampler 477 | self.n_samples = n_samples 478 | self.current_index = 0 479 | self.shuffle() 480 | 481 | def __len__(self): 482 | if isinstance(self.outputs, list): 483 | return self.outputs[0].shape[0] // self.batch_size 484 | else: 485 | return self.outputs.shape[0] // self.batch_size 486 | 487 | def shuffle(self): 488 | idx = self.rng.permutation(np.arange(self.instruments.shape[0])) 489 | self.instruments = self.instruments[idx,:] 490 | self.outputs = self.outputs[idx,:] 491 | self.features = [f[idx,:] for f in self.features] 492 | 493 | def __getitem__(self,idx): 494 | instruments = [self.instruments[idx*self.batch_size:(idx+1)*self.batch_size, :]] 495 | features = [inp[idx*self.batch_size:(idx+1)*self.batch_size, :] for inp in self.features] 496 | sampler_input = instruments + features 497 | samples = self.sampler(sampler_input, self.n_samples) 498 | batch_features = [f[idx*self.batch_size:(idx+1)*self.batch_size].repeat(self.n_samples, axis=0) for f in self.features] + [samples] 499 | batch_y = self.outputs[idx*self.batch_size:(idx+1)*self.batch_size].repeat(self.n_samples, axis=0) 500 | if idx == (len(self) - 1): 501 | self.shuffle() 502 | return batch_features, batch_y 503 | 504 | class OnesidedUnbaised(SampledSequence): 505 | def __init__(self, features, instruments, outputs, treatments, batch_size, sampler, n_samples=1, seed=None): 506 | self.rng = np.random.RandomState(seed) 507 | if not isinstance(features, list): 508 | features = [features.copy()] 509 | else: 510 | features = [f.copy() for f in features] 511 | self.features = features 512 | self.instruments = instruments.copy() 513 | self.outputs = outputs.copy() 514 | self.treatments = treatments.copy() 515 | self.batch_size = batch_size 516 | self.sampler = sampler 517 | self.n_samples = n_samples 518 | self.current_index = 0 519 | self.shuffle() 520 | 521 | def shuffle(self): 522 | idx = self.rng.permutation(np.arange(self.instruments.shape[0])) 523 | self.instruments = self.instruments[idx,:] 524 | self.outputs = self.outputs[idx,:] 525 | self.features = [f[idx,:] for f in self.features] 526 | self.treatments = self.treatments[idx,:] 527 | 528 | def __getitem__(self, idx): 529 | instruments = [self.instruments[idx*self.batch_size:(idx+1)*self.batch_size, :]] 530 | features = [inp[idx*self.batch_size:(idx+1)*self.batch_size, :] for inp in self.features] 531 | observed_treatments = self.treatments[idx*self.batch_size:(idx+1)*self.batch_size, :] 532 | sampler_input = instruments + features 533 | samples = self.sampler(sampler_input, self.n_samples // 2) 534 | samples = np.concatenate([observed_treatments, samples], axis=0) 535 | batch_features = [f[idx*self.batch_size:(idx+1)*self.batch_size].repeat(self.n_samples, axis=0) for f in self.features] + [samples] 536 | batch_y = self.outputs[idx*self.batch_size:(idx+1)*self.batch_size].repeat(self.n_samples, axis=0) 537 | if idx == (len(self) - 1): 538 | self.shuffle() 539 | return batch_features, batch_y 540 | 541 | ######### import DeepIV_True.architectures as architectures 542 | #new# 543 | def binary_crossentropy_output(x): 544 | pi = keras.layers.Dense(1, activation='softmax')(x) 545 | return pi 546 | 547 | def mixture_of_gaussian_output(x, n_components): 548 | mu = keras.layers.Dense(n_components, activation='linear')(x) 549 | log_sig = keras.layers.Dense(n_components, activation='linear')(x) 550 | pi = keras.layers.Dense(n_components, activation='softmax')(x) 551 | return Concatenate(axis=1)([pi, mu, log_sig]) 552 | 553 | def feed_forward_net(input, output, hidden_layers=[64, 64], activations='relu', 554 | dropout_rate=0., l2=0., constrain_norm=False): 555 | ''' 556 | Helper function for building a Keras feed forward network. 557 | 558 | input: Keras Input object appropriate for the data. e.g. input=Input(shape=(20,)) 559 | output: Function representing final layer for the network that maps from the last 560 | hidden layer to output. 561 | e.g. if output = Dense(10, activation='softmax') if we're doing 10 class 562 | classification or output = Dense(1, activation='linear') if we're doing 563 | regression. 564 | ''' 565 | state = input 566 | if isinstance(activations, str): 567 | activations = [activations] * len(hidden_layers) 568 | 569 | for h, a in zip(hidden_layers, activations): 570 | if l2 > 0.: 571 | w_reg = keras.regularizers.l2(l2) 572 | else: 573 | w_reg = None 574 | const = maxnorm(2) if constrain_norm else None 575 | state = Dense(h, activation=a, kernel_regularizer=w_reg, kernel_constraint=const)(state) 576 | if dropout_rate > 0.: 577 | state = Dropout(dropout_rate)(state) 578 | return output(state) 579 | 580 | class DeepIV(object): 581 | def __init__(self) -> None: 582 | self.config = { 583 | 'methodName': 'DeepIV', 584 | 'dropout': 0.5, 585 | 'epochs': 10, 586 | 'batch_size': 100, 587 | 'n_components': 5, 588 | 'layers': [128, 64, 32], 589 | 'activation': 'relu', 590 | 'samples_per_batch': 2, 591 | 't_loss': 'mixture_of_gaussians', 592 | 'y_loss': 'mse', 593 | 'seed': 2022, 594 | } 595 | 596 | def set_Configuration(self, config): 597 | self.config = config 598 | 599 | def fit(self, data, exp=-1, config=None): 600 | if config is None: 601 | config = self.config 602 | 603 | set_seed(config['seed']) 604 | clear_session() 605 | tf.reset_default_graph() 606 | random.seed(config['seed']) 607 | tf.compat.v1.set_random_seed(config['seed']) 608 | np.random.seed(config['seed']) 609 | data.numpy() 610 | 611 | config['num'] = data.train.length 612 | 613 | tfconfig = tf.ConfigProto() 614 | tfconfig.gpu_options.allow_growth=True 615 | sess = tf.Session(config=tfconfig) 616 | K.set_session(sess) 617 | 618 | dropout_rate = min(1000./(1000. + config['num']), config['dropout']) 619 | epochs = min(int(1000000./float(config['num'])), config['epochs']) 620 | 621 | instruments = Input(shape=(data.train.z.shape[1],), name="instruments") 622 | features = Input(shape=(data.train.x.shape[1],), name="features") 623 | treatment_input = Concatenate(axis=1)([instruments, features]) 624 | 625 | est_treat = feed_forward_net(treatment_input, lambda x: mixture_of_gaussian_output(x, config['n_components']), 626 | hidden_layers=config['layers'], 627 | dropout_rate=dropout_rate, l2=0.0001, 628 | activations=config['activation']) 629 | 630 | 631 | treatment_model = Treatment(inputs=[instruments, features], outputs=est_treat) 632 | treatment_model.compile('adam', loss=config['t_loss'], n_components=config['n_components']) 633 | 634 | treatment_model.fit([data.train.z, data.train.x], data.train.t, epochs=epochs, batch_size=config['batch_size']) 635 | 636 | treatment = Input(shape=(data.train.t.shape[1],), name="treatment") 637 | response_input = Concatenate(axis=1)([features, treatment]) 638 | 639 | est_response = feed_forward_net(response_input, Dense(1), 640 | activations=config['activation'], 641 | hidden_layers=config['layers'], 642 | l2=0.001, 643 | dropout_rate=dropout_rate) 644 | 645 | response_model = Response(treatment=treatment_model, 646 | inputs=[features, treatment], 647 | outputs=est_response) 648 | response_model.compile('adam', loss=config['y_loss']) 649 | 650 | print('Run {}-th experiment for {}. '.format(exp, config['methodName'])) 651 | 652 | response_model.fit([data.train.z, data.train.x], data.train.y, epochs=epochs, verbose=1, 653 | batch_size=config['batch_size'], samples_per_batch=config['samples_per_batch']) 654 | 655 | def estimation(data): 656 | return response_model.predict([data.x, data.t-data.t]), response_model.predict([data.x, data.t]) 657 | 658 | print('End. ' + '-'*20) 659 | 660 | self.estimation = estimation 661 | self.response_model = response_model 662 | 663 | def predict(self, data=None, t=None, x=None): 664 | if data is None: 665 | data = self.data.test 666 | 667 | if x is None: 668 | x = data.x 669 | 670 | if t is None: 671 | t = data.t 672 | 673 | return self.response_model.predict([x, t]) 674 | 675 | def ITE(self, data=None, t=None, x=None): 676 | if data is None: 677 | data = self.data.test 678 | 679 | if x is None: 680 | x = data.x 681 | 682 | if t is None: 683 | t = data.t 684 | 685 | ITE_0 = self.response_model.predict([x, t-t]) 686 | ITE_1 = self.response_model.predict([x, t-t+1]) 687 | ITE_t = self.response_model.predict([x, t]) 688 | 689 | return ITE_0,ITE_1,ITE_t 690 | 691 | def ATE(self, data=None, t=None, x=None): 692 | ITE_0,ITE_1,ITE_t = self.ITE(data,t,x) 693 | 694 | return np.mean(ITE_1-ITE_0), np.mean(ITE_t-ITE_0) 695 | 696 | -------------------------------------------------------------------------------- /mliv/inference/deep/dfiv_v1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import numpy as np 4 | from mliv.utils import set_seed, cat, split 5 | 6 | example = ''' 7 | from mliv.inference import DFIV 8 | 9 | model = DFIV() 10 | model.fit(data) 11 | ITE = model.predict(data.train) 12 | ATE,_ = model.ATE(data.train) 13 | ''' 14 | 15 | ############# Define Networks ################ 16 | def build_net(t_input_dim, z_input_dim, x_input_dim): 17 | treatment_net = nn.Sequential(nn.Linear(t_input_dim, 16), 18 | nn.ReLU(), 19 | nn.Linear(16, 1)) 20 | 21 | instrumental_net = nn.Sequential(nn.Linear(z_input_dim+x_input_dim, 128), 22 | nn.ReLU(), 23 | nn.Linear(128, 64), 24 | nn.ReLU(), 25 | nn.Linear(64, 32), 26 | nn.BatchNorm1d(32)) 27 | 28 | covariate_net = nn.Sequential(nn.Linear(x_input_dim, 128), 29 | nn.ReLU(), 30 | nn.Linear(128, 32), 31 | nn.BatchNorm1d(32), 32 | nn.ReLU(), 33 | nn.Linear(32, 16), 34 | nn.ReLU()) 35 | 36 | return treatment_net, instrumental_net, covariate_net 37 | 38 | ################## Define loss ################################# 39 | def fit_linear(target: torch.Tensor, feature: torch.Tensor, reg: float = 0.0): 40 | nData, nDim = feature.size() 41 | A = torch.matmul(feature.t(), feature) 42 | device = feature.device 43 | A = A + reg * torch.eye(nDim, device=device) 44 | A_inv = torch.inverse(A) 45 | if target.dim() == 2: 46 | b = torch.matmul(feature.t(), target) 47 | weight = torch.matmul(A_inv, b) 48 | else: 49 | b = torch.einsum("nd,n...->d...", feature, target) 50 | weight = torch.einsum("de,d...->e...", A_inv, b) 51 | return weight 52 | 53 | def linear_reg_pred(feature: torch.Tensor, weight: torch.Tensor): 54 | if weight.dim() == 2: 55 | return torch.matmul(feature, weight) 56 | else: 57 | return torch.einsum("nd,d...->n...", feature, weight) 58 | 59 | def linear_reg_loss(target: torch.Tensor, feature: torch.Tensor, reg: float): 60 | weight = fit_linear(target, feature, reg) 61 | pred = linear_reg_pred(feature, weight) 62 | return torch.norm((target - pred)) ** 2 + reg * torch.norm(weight) ** 2 63 | 64 | ############ Define Utils ##################### 65 | def add_const_col(mat: torch.Tensor): 66 | n_data = mat.size()[0] 67 | device = mat.device 68 | return torch.cat([mat, torch.ones((n_data, 1), device=device)], dim=1) 69 | 70 | def augment_z_feature(feature, add_intercept): 71 | if add_intercept: feature = add_const_col(feature) 72 | return feature 73 | 74 | def augment_tx_feature(feature, feature_tmp, add_intercept): 75 | if add_intercept: feature = add_const_col(feature) 76 | if add_intercept: feature_tmp = add_const_col(feature_tmp) 77 | feature = outer_prod(feature, feature_tmp) 78 | feature = torch.flatten(feature, start_dim=1) 79 | return feature 80 | 81 | def outer_prod(mat1: torch.Tensor, mat2: torch.Tensor): 82 | mat1_shape = tuple(mat1.size()) 83 | mat2_shape = tuple(mat2.size()) 84 | assert mat1_shape[0] == mat2_shape[0] 85 | nData = mat1_shape[0] 86 | aug_mat1_shape = mat1_shape + (1,) * (len(mat2_shape) - 1) 87 | aug_mat1 = torch.reshape(mat1, aug_mat1_shape) 88 | aug_mat2_shape = (nData,) + (1,) * (len(mat1_shape) - 1) + mat2_shape[1:] 89 | aug_mat2 = torch.reshape(mat2, aug_mat2_shape) 90 | return aug_mat1 * aug_mat2 91 | 92 | class DFIVTrainer(object): 93 | 94 | def __init__(self, data, train_dict): 95 | 96 | device = train_dict['device'] 97 | device = device if train_dict['GPU'] and torch.cuda.is_available() else "cpu" 98 | self.device = device 99 | 100 | data.tensor() 101 | data.to(device) 102 | data.split(train_dict["split_ratio"]) 103 | self.data = data 104 | 105 | self.t_loss = train_dict['t_loss'] 106 | self.y_loss = train_dict['y_loss'] 107 | self.gpu_flg = train_dict['GPU'] and torch.cuda.is_available() 108 | self.add_intercept = train_dict['intercept'] 109 | self.n_epoch = train_dict["epochs"] 110 | self.lam1 = train_dict["lam1"] 111 | self.lam2 = train_dict["lam2"] 112 | self.stage1_iter = train_dict["stage1_iter"] 113 | self.stage2_iter = train_dict["stage2_iter"] 114 | self.covariate_iter = train_dict["covariate_iter"] 115 | self.split_ratio = train_dict["split_ratio"] 116 | self.treatment_weight_decay = train_dict["treatment_weight_decay"] 117 | self.instrumental_weight_decay = train_dict["instrumental_weight_decay"] 118 | self.covariate_weight_decay = train_dict["covariate_weight_decay"] 119 | self.verbose = train_dict["verbose"] 120 | self.show_per_epoch = train_dict["show_per_epoch"] 121 | 122 | self.treatment_net, self.instrumental_net, self.covariate_net = build_net(train_dict['t_dim'], train_dict['z_dim'], train_dict['x_dim']) 123 | if self.gpu_flg: 124 | self.treatment_net.to(device) 125 | self.instrumental_net.to(device) 126 | self.covariate_net.to(device) 127 | self.treatment_opt = torch.optim.Adam(self.treatment_net.parameters(),weight_decay=self.treatment_weight_decay) 128 | self.instrumental_opt = torch.optim.Adam(self.instrumental_net.parameters(),weight_decay=self.instrumental_weight_decay) 129 | self.covariate_opt = torch.optim.Adam(self.covariate_net.parameters(),weight_decay=self.covariate_weight_decay) 130 | 131 | def train(self, verbose=None, show_per_epoch=None): 132 | if verbose is None or show_per_epoch is None: 133 | verbose, show_per_epoch = self.verbose, self.show_per_epoch 134 | 135 | self.lam1 *= self.data.data1.length 136 | self.lam2 *= self.data.data2.length 137 | 138 | for exp in range(self.n_epoch): 139 | self.stage1_update(self.data.data1, verbose) 140 | self.covariate_update(self.data.data1, self.data.data2, verbose) 141 | self.stage2_update(self.data.data1, self.data.data2, verbose) 142 | if exp % show_per_epoch == 0 or exp == self.n_epoch - 1: 143 | if verbose >= 1: 144 | pred_0x2y, pred_tx2y = self.estimation4tx(self.data.valid) 145 | mse_y = ((pred_tx2y - self.data.valid.y) ** 2).mean() 146 | mse_g = ((pred_tx2y - self.data.valid.g) ** 2).mean() 147 | print(f"Epoch {exp} ended: {mse_y}, {mse_g}. ") 148 | 149 | def stage1_update(self, train_1st, verbose): 150 | self.instrumental_net.train(True) 151 | self.treatment_net.train(False) 152 | self.covariate_net.train(False) 153 | 154 | treatment_feature = self.treatment_net(train_1st.t).detach() 155 | for i in range(self.stage1_iter): 156 | self.instrumental_opt.zero_grad() 157 | instrumental_feature = self.instrumental_net(cat([train_1st.z,train_1st.x])) 158 | feature = augment_z_feature(instrumental_feature, self.add_intercept) 159 | loss = linear_reg_loss(treatment_feature, feature, self.lam1) 160 | loss.backward() 161 | if verbose >= 2: print(f"stage1 learning: {loss.item()}") 162 | self.instrumental_opt.step() 163 | 164 | def covariate_update(self, train_1st, train_2nd, verbose): 165 | self.instrumental_net.train(False) 166 | self.treatment_net.train(False) 167 | self.covariate_net.train(True) 168 | 169 | instrumental_1st_feature = self.instrumental_net(cat([train_1st.z,train_1st.x])).detach() 170 | instrumental_2nd_feature = self.instrumental_net(cat([train_2nd.z,train_2nd.x])).detach() 171 | treatment_1st_feature = self.treatment_net(train_1st.t).detach() 172 | 173 | feature_1st = augment_z_feature(instrumental_1st_feature, self.add_intercept) 174 | feature_2nd = augment_z_feature(instrumental_2nd_feature, self.add_intercept) 175 | self.stage1_weight = fit_linear(treatment_1st_feature, feature_1st, self.lam1) 176 | predicted_treatment_feature_2nd = linear_reg_pred(feature_2nd, self.stage1_weight).detach() 177 | 178 | for i in range(self.covariate_iter): 179 | self.covariate_opt.zero_grad() 180 | covariate_feature = self.covariate_net(train_2nd.x) 181 | feature = augment_tx_feature(predicted_treatment_feature_2nd, covariate_feature, self.add_intercept) 182 | loss = linear_reg_loss(train_2nd.y, feature, self.lam2) 183 | loss.backward() 184 | if verbose >= 2: print(f"update covariate: {loss.item()}") 185 | self.covariate_opt.step() 186 | 187 | def stage2_update(self, train_1st, train_2nd, verbose): 188 | self.instrumental_net.train(False) 189 | self.treatment_net.train(True) 190 | self.covariate_net.train(False) 191 | 192 | instrumental_1st_feature = self.instrumental_net(cat([train_1st.z,train_1st.x])).detach() 193 | instrumental_2nd_feature = self.instrumental_net(cat([train_2nd.z,train_2nd.x])).detach() 194 | covariate_2nd_feature = self.covariate_net(train_2nd.x).detach() 195 | 196 | for i in range(self.stage2_iter): 197 | self.treatment_opt.zero_grad() 198 | treatment_1st_feature = self.treatment_net(train_1st.t) 199 | 200 | feature_1st = augment_z_feature(instrumental_1st_feature, self.add_intercept) 201 | feature_2nd = augment_z_feature(instrumental_2nd_feature, self.add_intercept) 202 | self.stage1_weight = fit_linear(treatment_1st_feature, feature_1st, self.lam1) 203 | predicted_treatment_feature = linear_reg_pred(feature_2nd, self.stage1_weight) 204 | 205 | feature = augment_tx_feature(predicted_treatment_feature, covariate_2nd_feature, self.add_intercept) 206 | self.stage2_weight = fit_linear(train_2nd.y, feature, self.lam2) 207 | pred = linear_reg_pred(feature, self.stage2_weight) 208 | loss = torch.norm((train_2nd.y - pred)) ** 2 + self.lam2 * torch.norm(self.stage2_weight) ** 2 209 | 210 | loss.backward() 211 | if verbose >= 2: print(f"stage2 learning: {loss.item()}") 212 | self.treatment_opt.step() 213 | 214 | def estimation4tx(self, data, update_weight1=False, update_weight2=False): 215 | self.instrumental_net.train(False) 216 | self.treatment_net.train(False) 217 | self.covariate_net.train(False) 218 | 219 | instrumental_feature = self.instrumental_net(cat([data.z,data.x])).detach() 220 | treatment_feature = self.treatment_net(data.t).detach() 221 | treatment_feature_0 = self.treatment_net(data.t-data.t).detach() 222 | covariate_feature = self.covariate_net(data.x).detach() 223 | 224 | feature_stage1 = augment_z_feature(instrumental_feature, self.add_intercept) 225 | if update_weight1: self.stage1_weight = fit_linear(treatment_feature, feature_stage1, self.lam1) 226 | predicted_treatment_feature = linear_reg_pred(feature_stage1, self.stage1_weight) 227 | 228 | feature_stage2_tx2y = augment_tx_feature(treatment_feature, covariate_feature, self.add_intercept) 229 | if update_weight2: self.stage2_weight = fit_linear(data.y, feature_stage2_tx2y, self.lam2) 230 | pred_tx2y = linear_reg_pred(feature_stage2_tx2y, self.stage2_weight) 231 | 232 | feature_stage2_0x2y = augment_tx_feature(treatment_feature_0, covariate_feature, self.add_intercept) 233 | if update_weight2: self.stage2_weight = fit_linear(data.y, feature_stage2_0x2y, self.lam2) 234 | pred_0x2y = linear_reg_pred(feature_stage2_0x2y, self.stage2_weight) 235 | 236 | return pred_0x2y, pred_tx2y 237 | 238 | def estimation4zx(self, data, update_weight1=False, update_weight2=False): 239 | self.instrumental_net.train(False) 240 | self.treatment_net.train(False) 241 | self.covariate_net.train(False) 242 | 243 | instrumental_feature = self.instrumental_net(cat([data.z,data.x])).detach() 244 | treatment_feature = self.treatment_net(data.t).detach() 245 | covariate_feature = self.covariate_net(data.x).detach() 246 | 247 | feature_stage1 = augment_z_feature(instrumental_feature, self.add_intercept) 248 | if update_weight1: self.stage1_weight = fit_linear(treatment_feature, feature_stage1, self.lam1) 249 | predicted_treatment_feature = linear_reg_pred(feature_stage1, self.stage1_weight) 250 | 251 | feature_stage2_zx2y = augment_tx_feature(predicted_treatment_feature, covariate_feature, self.add_intercept) 252 | if update_weight2: self.stage2_weight = fit_linear(data.y, feature_stage2_zx2y, self.lam2) 253 | pred_zx2y = linear_reg_pred(feature_stage2_zx2y, self.stage2_weight) 254 | 255 | return pred_zx2y 256 | 257 | class DFIV(object): 258 | def __init__(self) -> None: 259 | self.config = { 260 | 'methodName': 'DFIV', 261 | 't_loss': 'mse', 262 | 'y_loss': 'mse', 263 | 'device': 'cuda:0', 264 | 'GPU': True, 265 | 'intercept': True, 266 | "epochs": 100, 267 | 'lam1': 0.1, 268 | 'lam2': 0.1, 269 | 'stage1_iter': 20, 270 | 'stage2_iter': 1, 271 | 'covariate_iter': 20, 272 | 'split_ratio': 0.5, 273 | 'treatment_weight_decay': 0.0, 274 | 'instrumental_weight_decay': 0.0, 275 | 'covariate_weight_decay': 0.1, 276 | 'verbose': 1, 277 | 'show_per_epoch': 20, 278 | 'seed': 2022, 279 | } 280 | 281 | def set_Configuration(self, config): 282 | self.config = config 283 | 284 | def fit(self, data, exp=-1, config=None): 285 | if config is None: 286 | config = self.config 287 | 288 | set_seed(config['seed']) 289 | data.numpy() 290 | 291 | self.z_dim = data.train.z.shape[1] 292 | self.x_dim = data.train.x.shape[1] 293 | self.t_dim = data.train.t.shape[1] 294 | 295 | config['z_dim'] = self.z_dim 296 | config['x_dim'] = self.x_dim 297 | config['t_dim'] = self.t_dim 298 | 299 | print('Run {}-th experiment for {}. '.format(exp, config['methodName'])) 300 | 301 | trainer = DFIVTrainer(data, config) 302 | trainer.train() 303 | 304 | print('End. ' + '-'*20) 305 | 306 | self.estimation = trainer.estimation4tx 307 | self.nets = trainer 308 | 309 | def predict(self, data=None, t=None, x=None): 310 | if data is None: 311 | data = self.data.test 312 | 313 | if x is None: 314 | x = data.x 315 | 316 | if t is None: 317 | t = data.t 318 | 319 | with torch.no_grad(): 320 | treatment_feature = self.nets.treatment_net(t).detach() 321 | covariate_feature = self.nets.covariate_net(x).detach() 322 | feature_stage2_tx2y = augment_tx_feature(treatment_feature, covariate_feature, self.nets.add_intercept) 323 | pred_tx2y = linear_reg_pred(feature_stage2_tx2y, self.nets.stage2_weight).detach().cpu().numpy() 324 | 325 | return pred_tx2y 326 | 327 | def ITE(self, data=None, t=None, x=None): 328 | if data is None: 329 | data = self.data.test 330 | 331 | if x is None: 332 | x = data.x 333 | 334 | if t is None: 335 | t = data.t 336 | 337 | with torch.no_grad(): 338 | feature_0 = self.nets.treatment_net(t-t).detach() 339 | feature_1 = self.nets.treatment_net(t-t+1).detach() 340 | feature_t = self.nets.treatment_net(t).detach() 341 | x_feature = self.nets.covariate_net(x).detach() 342 | 343 | feature_0x = augment_tx_feature(feature_0, x_feature, self.nets.add_intercept) 344 | feature_1x = augment_tx_feature(feature_1, x_feature, self.nets.add_intercept) 345 | feature_tx = augment_tx_feature(feature_t, x_feature, self.nets.add_intercept) 346 | 347 | ITE_0 = linear_reg_pred(feature_0x, self.nets.stage2_weight).detach().cpu().numpy() 348 | ITE_1 = linear_reg_pred(feature_1x, self.nets.stage2_weight).detach().cpu().numpy() 349 | ITE_t = linear_reg_pred(feature_tx, self.nets.stage2_weight).detach().cpu().numpy() 350 | 351 | return ITE_0,ITE_1,ITE_t 352 | 353 | def ATE(self, data=None, t=None, x=None): 354 | ITE_0,ITE_1,ITE_t = self.ITE(data,t,x) 355 | 356 | return np.mean(ITE_1-ITE_0), np.mean(ITE_t-ITE_0) 357 | -------------------------------------------------------------------------------- /mliv/inference/dflearning/__init__.py: -------------------------------------------------------------------------------- 1 | from .dfl_v1 import DFL -------------------------------------------------------------------------------- /mliv/inference/dflearning/dfl_v1.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any, List, NamedTuple, TYPE_CHECKING, Optional 2 | import torch 3 | from torch import nn 4 | import numpy as np 5 | from pathlib import Path 6 | from mliv.utils import set_seed 7 | 8 | example = ''' 9 | from mliv.inference import DFL 10 | 11 | model = DFL() 12 | model.fit(data) 13 | ITE = model.predict(data.train) 14 | ATE,_ = model.ATE(data.train) 15 | ''' 16 | 17 | ############ from .data_class import TrainDataSet, TestDataSet, TrainDataSetTorch, TestDataSetTorch 18 | class TrainDataSet(NamedTuple): 19 | treatment: np.ndarray 20 | instrumental: np.ndarray 21 | covariate: Optional[np.ndarray] 22 | outcome: np.ndarray 23 | structural: np.ndarray 24 | 25 | class TestDataSet(NamedTuple): 26 | treatment: np.ndarray 27 | covariate: Optional[np.ndarray] 28 | structural: np.ndarray 29 | instrumental: Optional[np.ndarray] 30 | outcome: Optional[np.ndarray] 31 | 32 | class TrainDataSetTorch(NamedTuple): 33 | treatment: torch.Tensor 34 | instrumental: torch.Tensor 35 | covariate: torch.Tensor 36 | outcome: torch.Tensor 37 | structural: torch.Tensor 38 | 39 | @classmethod 40 | def from_numpy(cls, train_data: TrainDataSet): 41 | covariate = None 42 | if train_data.covariate is not None: 43 | covariate = torch.tensor(train_data.covariate, dtype=torch.float32) 44 | return TrainDataSetTorch(treatment=torch.tensor(train_data.treatment, dtype=torch.float32), 45 | instrumental=torch.tensor(train_data.instrumental, dtype=torch.float32), 46 | covariate=covariate, 47 | outcome=torch.tensor(train_data.outcome, dtype=torch.float32), 48 | structural=torch.tensor(train_data.structural, dtype=torch.float32)) 49 | 50 | def to(self, device): 51 | covariate = None 52 | if self.covariate is not None: 53 | covariate = self.covariate.to(device) 54 | return TrainDataSetTorch(treatment=self.treatment.to(device), 55 | instrumental=self.instrumental.to(device), 56 | covariate=covariate, 57 | outcome=self.outcome.to(device), 58 | structural=self.structural.to(device)) 59 | 60 | class TestDataSetTorch(NamedTuple): 61 | treatment: torch.Tensor 62 | instrumental: torch.Tensor 63 | covariate: torch.Tensor 64 | outcome: torch.Tensor 65 | structural: torch.Tensor 66 | 67 | @classmethod 68 | def from_numpy(cls, test_data: TestDataSet): 69 | covariate = None 70 | instrumental = None 71 | outcome = None 72 | if test_data.covariate is not None: 73 | covariate = torch.tensor(test_data.covariate, dtype=torch.float32) 74 | if test_data.instrumental is not None: 75 | instrumental = torch.tensor(test_data.instrumental, dtype=torch.float32) 76 | if test_data.outcome is not None: 77 | outcome = torch.tensor(test_data.outcome, dtype=torch.float32) 78 | return TestDataSetTorch(treatment=torch.tensor(test_data.treatment, dtype=torch.float32), 79 | covariate=covariate, 80 | instrumental=instrumental, 81 | outcome=outcome, 82 | structural=torch.tensor(test_data.structural, dtype=torch.float32)) 83 | def to(self, device): 84 | covariate = None 85 | instrumental = None 86 | outcome = None 87 | if self.covariate is not None: 88 | covariate = self.covariate.to(device) 89 | if self.instrumental is not None: 90 | instrumental = self.instrumental.to(device) 91 | if self.outcome is not None: 92 | outcome = self.outcome.to(device) 93 | return TestDataSetTorch(treatment=self.treatment.to(device), 94 | covariate=covariate, 95 | instrumental=instrumental, 96 | outcome=outcome, 97 | structural=self.structural.to(device)) 98 | 99 | ############ from .utils.pytorch_linear_reg_utils import linear_reg_loss, fit_linear, linear_reg_pred, outer_prod, add_const_col 100 | def inv_logit_np(x): 101 | return np.log(x / (1-x)) 102 | 103 | def logit_np(x): 104 | return 1 / (1 + np.exp(-x)) 105 | 106 | def inv_logit(x): 107 | return torch.log(x / (1-x)) 108 | 109 | def logit(x): 110 | return 1 / (1 + torch.exp(-x)) 111 | 112 | def linear_log_loss(target: torch.Tensor, 113 | feature: torch.Tensor, 114 | reg: float): 115 | weight = fit_linear(target, feature, reg) 116 | pred = linear_reg_pred(feature, weight) 117 | 118 | labels = logit(target) 119 | logits = logit(pred) 120 | return (-(torch.log(logits) * labels+torch.log(1-logits) * (1-labels))).sum() + reg * torch.norm(weight) ** 2 121 | 122 | def linear_reg_loss(target: torch.Tensor, 123 | feature: torch.Tensor, 124 | reg: float): 125 | weight = fit_linear(target, feature, reg) 126 | pred = linear_reg_pred(feature, weight) 127 | return torch.norm((target - pred)) ** 2 + reg * torch.norm(weight) ** 2 128 | 129 | def fit_linear(target: torch.Tensor, 130 | feature: torch.Tensor, 131 | reg: float = 0.0): 132 | assert feature.dim() == 2 133 | assert target.dim() >= 2 134 | nData, nDim = feature.size() 135 | A = torch.matmul(feature.t(), feature) 136 | device = feature.device 137 | A = A + reg * torch.eye(nDim, device=device) 138 | # U = torch.cholesky(A) 139 | # A_inv = torch.cholesky_inverse(U) 140 | #TODO use cholesky version in the latest pytorch 141 | A_inv = torch.inverse(A) 142 | if target.dim() == 2: 143 | b = torch.matmul(feature.t(), target) 144 | weight = torch.matmul(A_inv, b) 145 | else: 146 | b = torch.einsum("nd,n...->d...", feature, target) 147 | weight = torch.einsum("de,d...->e...", A_inv, b) 148 | return weight 149 | 150 | def linear_reg_pred(feature: torch.Tensor, weight: torch.Tensor): 151 | assert weight.dim() >= 2 152 | if weight.dim() == 2: 153 | return torch.matmul(feature, weight) 154 | else: 155 | return torch.einsum("nd,d...->n...", feature, weight) 156 | 157 | def outer_prod(mat1: torch.Tensor, mat2: torch.Tensor): 158 | mat1_shape = tuple(mat1.size()) 159 | mat2_shape = tuple(mat2.size()) 160 | assert mat1_shape[0] == mat2_shape[0] 161 | nData = mat1_shape[0] 162 | aug_mat1_shape = mat1_shape + (1,) * (len(mat2_shape) - 1) 163 | aug_mat1 = torch.reshape(mat1, aug_mat1_shape) 164 | aug_mat2_shape = (nData,) + (1,) * (len(mat1_shape) - 1) + mat2_shape[1:] 165 | aug_mat2 = torch.reshape(mat2, aug_mat2_shape) 166 | return aug_mat1 * aug_mat2 167 | 168 | def add_const_col(mat: torch.Tensor): 169 | assert mat.dim() == 2 170 | n_data = mat.size()[0] 171 | device = mat.device 172 | return torch.cat([mat, torch.ones((n_data, 1), device=device)], dim=1) 173 | 174 | ######### Monitor 175 | class DFLMonitor: 176 | train_data_t: TrainDataSetTorch 177 | test_data_t: TestDataSetTorch 178 | validation_data_t: TrainDataSetTorch 179 | 180 | def __init__(self, t_loss, y_loss, dump_folder, trainer): 181 | 182 | self.t_loss = t_loss 183 | self.y_loss = y_loss 184 | self.metrics = {"stage1_insample_loss": [], 185 | "stage1_outsample_loss": [], 186 | "stage2_insample_loss": [], 187 | "stage2_outsample_loss": [], 188 | "test_loss": []} 189 | 190 | self.dump_folder = dump_folder 191 | self.trainer = trainer 192 | 193 | ##################################################### begin: t_loss = 'bin' 194 | if self.t_loss == 'bin': 195 | self.val_best = 99999 196 | self.pred_ate_train_best = 99999 197 | self.pred_ate_test_best = 99999 198 | 199 | self.pred_ate_train_final = 99999 200 | self.pred_ate_test_final = 99999 201 | ##################################################### 202 | else: 203 | self.train_y_pred = None 204 | self.val_y_pred = None 205 | self.test_y_pred = None 206 | 207 | def configure_data(self, train_data_t: TrainDataSetTorch, 208 | test_data_t: TestDataSetTorch, 209 | validation_data_t: TrainDataSetTorch): 210 | 211 | self.train_data_t = train_data_t 212 | self.test_data_t = test_data_t 213 | self.validation_data_t = validation_data_t 214 | 215 | def record(self, verbose: int): 216 | self.trainer.treatment_net.train(False) 217 | if self.trainer.covariate_net is not None: 218 | self.trainer.covariate_net.train(False) 219 | 220 | n_train_data = self.train_data_t.treatment.size()[0] 221 | n_val_data = self.validation_data_t.treatment.size()[0] 222 | n_test_data = self.test_data_t.treatment.size()[0] 223 | with torch.no_grad(): 224 | treatment_train_feature = self.trainer.treatment_net(self.train_data_t.treatment) 225 | treatment_val_feature = self.trainer.treatment_net(self.validation_data_t.treatment) 226 | treatment_test_feature = self.trainer.treatment_net(self.test_data_t.treatment) 227 | 228 | covariate_train_feature = None 229 | covariate_val_feature = None 230 | covariate_test_feature = None 231 | if self.trainer.covariate_net is not None: 232 | covariate_train_feature = self.trainer.covariate_net(self.train_data_t.covariate) 233 | covariate_val_feature = self.trainer.covariate_net(self.validation_data_t.covariate) 234 | covariate_test_feature = self.trainer.covariate_net(self.test_data_t.covariate) 235 | 236 | # stage2 237 | feature = DFIVModel.augment_stage2_feature(treatment_train_feature, 238 | covariate_train_feature, 239 | self.trainer.add_intercept) 240 | 241 | weight = fit_linear(self.train_data_t.outcome, feature, self.trainer.lam) 242 | insample_pred = linear_reg_pred(feature, weight) 243 | if self.y_loss == 'bin': 244 | labels = logit(self.train_data_t.outcome) 245 | logits = logit(insample_pred) 246 | insample_loss = (-(torch.log(logits) * labels+torch.log(1-logits) * (1-labels))).sum() / n_train_data 247 | else: 248 | insample_loss = torch.norm(self.train_data_t.outcome - insample_pred) ** 2 / n_train_data 249 | ############################################################################## mse == norm ???? 250 | # insample_loss = torch.norm(self.train_data_t.outcome - insample_pred) ** 2 / n_train_data 251 | ############################################################################## 252 | 253 | val_feature = DFIVModel.augment_stage2_feature(treatment_val_feature, 254 | covariate_val_feature, 255 | self.trainer.add_intercept) 256 | outsample_pred = linear_reg_pred(val_feature, weight) 257 | if self.y_loss == 'bin': 258 | labels = logit(self.validation_data_t.outcome) 259 | logits = logit(outsample_pred) 260 | outsample_loss = (-(torch.log(logits) * labels+torch.log(1-logits) * (1-labels))).sum() / n_val_data 261 | else: 262 | outsample_loss = torch.norm(self.validation_data_t.outcome - outsample_pred) ** 2 / n_val_data 263 | 264 | # eval for test 265 | test_feature = DFIVModel.augment_stage2_feature(treatment_test_feature, 266 | covariate_test_feature, 267 | self.trainer.add_intercept) 268 | test_pred = linear_reg_pred(test_feature, weight) 269 | if self.y_loss == 'bin': 270 | labels = logit(self.test_data_t.structural) 271 | logits = logit(test_pred) 272 | test_loss = (-(torch.log(logits) * labels+torch.log(1-logits) * (1-labels))).sum() / n_test_data 273 | else: 274 | test_loss = torch.norm(self.test_data_t.structural - test_pred) ** 2 / n_test_data 275 | 276 | if verbose >= 1: 277 | print(f"insample_loss:{insample_loss.item()}") 278 | print(f"outsample_loss:{outsample_loss.item()}") 279 | print(f"test_loss:{test_loss.item()}") 280 | 281 | 282 | ##################################################### begin: t_loss = 'bin' 283 | if self.t_loss == 'bin': 284 | treatment0_train_feature = self.trainer.treatment_net(self.train_data_t.treatment-self.train_data_t.treatment) 285 | treatment0_test_feature = self.trainer.treatment_net(self.test_data_t.treatment-self.test_data_t.treatment) 286 | treatment1_train_feature = self.trainer.treatment_net(self.train_data_t.treatment-self.train_data_t.treatment+1) 287 | treatment1_test_feature = self.trainer.treatment_net(self.test_data_t.treatment-self.test_data_t.treatment+1) 288 | 289 | test_feature1 = DFIVModel.augment_stage2_feature(treatment1_test_feature, 290 | covariate_test_feature, 291 | self.trainer.add_intercept) 292 | test_pred1 = linear_reg_pred(test_feature1, weight) 293 | test_feature0 = DFIVModel.augment_stage2_feature(treatment0_test_feature, 294 | covariate_test_feature, 295 | self.trainer.add_intercept) 296 | test_pred0 = linear_reg_pred(test_feature0, weight) 297 | 298 | train_feature1 = DFIVModel.augment_stage2_feature(treatment1_train_feature, 299 | covariate_train_feature, 300 | self.trainer.add_intercept) 301 | train_pred1 = linear_reg_pred(train_feature1, weight) 302 | train_feature0 = DFIVModel.augment_stage2_feature(treatment0_train_feature, 303 | covariate_train_feature, 304 | self.trainer.add_intercept) 305 | train_pred0 = linear_reg_pred(train_feature0, weight) 306 | 307 | if outsample_loss < self.val_best: 308 | print(f"val_best from {self.val_best} to {outsample_loss}.") 309 | self.val_best = outsample_loss 310 | 311 | self.pred_ate_test_best = test_pred1.mean() - test_pred0.mean() 312 | self.pred_ate_train_best = train_pred1.mean() - train_pred0.mean() 313 | 314 | print(f"train_ate_best: {self.pred_ate_train_best.item()}; test_ate_best: {self.pred_ate_test_best.item()}.") 315 | 316 | self.pred_ate_test_final = test_pred1.mean() - test_pred0.mean() 317 | self.pred_ate_train_final = train_pred1.mean() - train_pred0.mean() 318 | 319 | print(f"train_ate_final: {self.pred_ate_train_final.item()}; test_ate_final: {self.pred_ate_test_final.item()}.") 320 | 321 | ##################################################### 322 | else: 323 | self.train_y_pred = [insample_pred, self.train_data_t.outcome] 324 | self.val_y_pred = [outsample_pred, self.validation_data_t.outcome] 325 | self.test_y_pred = [test_pred, self.test_data_t.structural] 326 | 327 | ########## DFLModel 328 | class DFLModel: 329 | weight_mat: torch.Tensor 330 | 331 | def __init__(self, 332 | treatment_net: nn.Module, 333 | covariate_net: Optional[nn.Module], 334 | add_intercept: bool, 335 | device: str 336 | ): 337 | self.treatment_net = treatment_net 338 | self.covariate_net = covariate_net 339 | self.add_intercept = add_intercept 340 | self.device = device 341 | 342 | @staticmethod 343 | def augment_feature(treatment_feature: torch.Tensor, 344 | covariate_feature: Optional[torch.Tensor], 345 | add_intercept: bool): 346 | feature = treatment_feature 347 | if add_intercept: 348 | feature = add_const_col(feature) 349 | 350 | if covariate_feature is not None: 351 | feature_tmp = covariate_feature 352 | if add_intercept: 353 | feature_tmp = add_const_col(feature_tmp) 354 | feature = outer_prod(feature, feature_tmp) 355 | feature = torch.flatten(feature, start_dim=1) 356 | 357 | return feature 358 | 359 | @staticmethod 360 | def fit_dfl(treatment_feature: torch.Tensor, 361 | covariate_feature: Optional[torch.Tensor], 362 | outcome_t: torch.Tensor, 363 | lam: float, add_intercept: bool 364 | ): 365 | 366 | # stage1 367 | feature = DFLModel.augment_feature(treatment_feature, 368 | covariate_feature, 369 | add_intercept) 370 | 371 | weight = fit_linear(outcome_t, feature, lam) 372 | pred = linear_reg_pred(feature, weight) 373 | loss = torch.norm((outcome_t - pred)) ** 2 + lam * torch.norm(weight) ** 2 374 | 375 | labels = logit(outcome_t) 376 | logits = logit(pred) 377 | log_loss = (-(torch.log(logits) * labels+torch.log(1-logits) * (1-labels))).sum() + lam * torch.norm(weight) ** 2 378 | 379 | return dict(weight=weight, loss=loss, log_loss=log_loss) 380 | 381 | def fit_t(self, train_data_t: TrainDataSetTorch, lam: float): 382 | treatment_feature = self.treatment_net(train_data_t.treatment) 383 | outcome_t = train_data_t.outcome 384 | covariate_feature = None 385 | if self.covariate_net is not None: 386 | covariate_feature = self.covariate_net(train_data_t.covariate) 387 | 388 | res = DFLModel.fit_dfl(treatment_feature, covariate_feature, outcome_t, lam, self.add_intercept) 389 | self.weight_mat = res["weight"] 390 | 391 | def fit(self, train_data: TrainDataSet, lam: float): 392 | train_data_t = TrainDataSetTorch.from_numpy(train_data) 393 | self.fit_t(train_data_t, lam) 394 | 395 | def predict_t(self, treatment: torch.Tensor, covariate: Optional[torch.Tensor]): 396 | treatment_feature = self.treatment_net(treatment) 397 | covariate_feature = None 398 | if self.covariate_net: 399 | covariate_feature = self.covariate_net(covariate) 400 | 401 | feature = DFLModel.augment_feature(treatment_feature, covariate_feature, self.add_intercept) 402 | return linear_reg_pred(feature, self.weight_mat) 403 | 404 | def predict(self, treatment: np.ndarray, covariate: Optional[np.ndarray]): 405 | treatment_t = torch.tensor(treatment, dtype=torch.float32).to(self.device) 406 | covariate_t = None 407 | if covariate is not None: 408 | covariate_t = torch.tensor(covariate, dtype=torch.float32).to(self.device) 409 | return self.predict_t(treatment_t, covariate_t).data.detach().cpu().numpy() 410 | 411 | def evaluate_t(self, y_loss: str, test_data: TestDataSetTorch): 412 | target = test_data.structural 413 | with torch.no_grad(): 414 | pred = self.predict_t(test_data.treatment, test_data.covariate) 415 | if y_loss == 'bin': 416 | return (torch.norm((target - pred)) ** 2) / target.size()[0] 417 | else: 418 | return (torch.norm((target - pred)) ** 2) / target.size()[0] 419 | 420 | def evaluate(self, y_loss: str, test_data: TestDataSet): 421 | return self.evaluate_t(y_loss, TestDataSetTorch.from_numpy(test_data)).data.detach().cpu().item() 422 | 423 | class DFIVModel: 424 | stage1_weight: torch.Tensor 425 | stage2_weight: torch.Tensor 426 | 427 | def __init__(self, 428 | treatment_net: nn.Module, 429 | instrumental_net: nn.Module, 430 | covariate_net: Optional[nn.Module], 431 | add_stage1_intercept: bool, 432 | add_stage2_intercept: bool 433 | ): 434 | self.treatment_net = treatment_net 435 | self.instrumental_net = instrumental_net 436 | self.covariate_net = covariate_net 437 | self.add_stage1_intercept = add_stage1_intercept 438 | self.add_stage2_intercept = add_stage2_intercept 439 | 440 | @staticmethod 441 | def augment_stage1_feature(instrumental_feature: torch.Tensor, 442 | add_stage1_intercept: bool): 443 | 444 | feature = instrumental_feature 445 | if add_stage1_intercept: 446 | feature = add_const_col(feature) 447 | return feature 448 | 449 | @staticmethod 450 | def augment_stage2_feature(predicted_treatment_feature: torch.Tensor, 451 | covariate_feature: Optional[torch.Tensor], 452 | add_stage2_intercept: bool): 453 | feature = predicted_treatment_feature 454 | if add_stage2_intercept: 455 | feature = add_const_col(feature) 456 | 457 | if covariate_feature is not None: 458 | feature_tmp = covariate_feature 459 | if add_stage2_intercept: 460 | feature_tmp = add_const_col(feature_tmp) 461 | feature = outer_prod(feature, feature_tmp) 462 | feature = torch.flatten(feature, start_dim=1) 463 | 464 | return feature 465 | 466 | @staticmethod 467 | def fit_2sls(treatment_1st_feature: torch.Tensor, 468 | instrumental_1st_feature: torch.Tensor, 469 | instrumental_2nd_feature: torch.Tensor, 470 | covariate_2nd_feature: Optional[torch.Tensor], 471 | outcome_2nd_t: torch.Tensor, 472 | lam1: float, lam2: float, 473 | add_stage1_intercept: bool, 474 | add_stage2_intercept: bool, 475 | ): 476 | 477 | # stage1 478 | feature = DFIVModel.augment_stage1_feature(instrumental_1st_feature, add_stage1_intercept) 479 | stage1_weight = fit_linear(treatment_1st_feature, feature, lam1) 480 | 481 | # predicting for stage 2 482 | feature = DFIVModel.augment_stage1_feature(instrumental_2nd_feature, 483 | add_stage1_intercept) 484 | predicted_treatment_feature = linear_reg_pred(feature, stage1_weight) 485 | 486 | # stage2 487 | feature = DFIVModel.augment_stage2_feature(predicted_treatment_feature, 488 | covariate_2nd_feature, 489 | add_stage2_intercept) 490 | 491 | stage2_weight = fit_linear(outcome_2nd_t, feature, lam2) 492 | pred = linear_reg_pred(feature, stage2_weight) 493 | stage2_loss = torch.norm((outcome_2nd_t - pred)) ** 2 + lam2 * torch.norm(stage2_weight) ** 2 494 | 495 | labels = logit(outcome_2nd_t) 496 | logits = logit(pred) 497 | stage2_log_loss = (-(torch.log(logits) * labels+torch.log(1-logits) * (1-labels))).sum() + lam2 * torch.norm(stage2_weight) ** 2 498 | 499 | return dict(stage1_weight=stage1_weight, 500 | predicted_treatment_feature=predicted_treatment_feature, 501 | stage2_weight=stage2_weight, 502 | stage2_loss=stage2_loss, 503 | stage2_log_loss=stage2_log_loss) 504 | 505 | def fit_t(self, 506 | train_1st_data_t: TrainDataSetTorch, 507 | train_2nd_data_t: TrainDataSetTorch, 508 | lam1: float, lam2: float): 509 | 510 | treatment_1st_feature = self.treatment_net(train_1st_data_t.treatment) 511 | instrumental_1st_feature = self.instrumental_net(train_1st_data_t.instrumental) 512 | instrumental_2nd_feature = self.instrumental_net(train_2nd_data_t.instrumental) 513 | outcome_2nd_t = train_2nd_data_t.outcome 514 | covariate_2nd_feature = None 515 | if self.covariate_net is not None: 516 | covariate_2nd_feature = self.covariate_net(train_2nd_data_t.covariate) 517 | 518 | res = DFIVModel.fit_2sls(treatment_1st_feature, 519 | instrumental_1st_feature, 520 | instrumental_2nd_feature, 521 | covariate_2nd_feature, 522 | outcome_2nd_t, 523 | lam1, lam2, 524 | self.add_stage1_intercept, 525 | self.add_stage2_intercept) 526 | 527 | self.stage1_weight = res["stage1_weight"] 528 | self.stage2_weight = res["stage2_weight"] 529 | 530 | def fit(self, train_1st_data: TrainDataSet, train_2nd_data: TrainDataSet, lam1: float, lam2: float): 531 | train_1st_data_t = TrainDataSetTorch.from_numpy(train_1st_data) 532 | train_2nd_data_t = TrainDataSetTorch.from_numpy(train_2nd_data) 533 | self.fit_t(train_1st_data_t, train_2nd_data_t, lam1, lam2) 534 | 535 | def predict_t(self, treatment: torch.Tensor, covariate: Optional[torch.Tensor]): 536 | treatment_feature = self.treatment_net(treatment) 537 | covariate_feature = None 538 | if self.covariate_net: 539 | covariate_feature = self.covariate_net(covariate) 540 | 541 | feature = DFIVModel.augment_stage2_feature(treatment_feature, 542 | covariate_feature, 543 | self.add_stage2_intercept) 544 | return linear_reg_pred(feature, self.stage2_weight) 545 | 546 | def predict(self, treatment: np.ndarray, covariate: Optional[np.ndarray]): 547 | treatment_t = torch.tensor(treatment, dtype=torch.float32) 548 | covariate_t = None 549 | if covariate is not None: 550 | covariate_t = torch.tensor(covariate, dtype=torch.float32) 551 | return self.predict_t(treatment_t, covariate_t).data.numpy() 552 | 553 | def evaluate_t(self, y_loss: str, test_data: TestDataSetTorch): 554 | target = test_data.structural 555 | with torch.no_grad(): 556 | pred = self.predict_t(test_data.treatment, test_data.covariate) 557 | if y_loss == 'bin': 558 | return (torch.norm((target - pred)) ** 2) / target.size()[0] 559 | else: 560 | return (torch.norm((target - pred)) ** 2) / target.size()[0] 561 | 562 | def evaluate(self, y_loss: str, test_data: TestDataSet): 563 | return self.evaluate_t(y_loss, TestDataSetTorch.from_numpy(test_data)).data.item() 564 | 565 | ########## TrainerATE 566 | class DFLTrainer(object): 567 | 568 | def __init__(self, t_loss: str, y_loss: str, data_list: List, net_list: List, train_params: Dict[str, Any], 569 | gpu_flg: bool = False, dump_folder: Optional[Path] = None): 570 | self.t_loss = t_loss 571 | self.y_loss = y_loss 572 | 573 | self.data_list = data_list 574 | 575 | if gpu_flg and torch.cuda.is_available(): 576 | self.device = "cuda:0" 577 | else: 578 | self.device = "cpu" 579 | 580 | # configure training params 581 | self.epochs: int = train_params["epochs"] 582 | self.treatment_weight_decay = train_params["treatment_weight_decay"] 583 | self.covariate_weight_decay = train_params["covariate_weight_decay"] 584 | self.lam: float = train_params["lam"] 585 | self.n_iter_treatment = train_params["n_iter_treatment"] 586 | self.n_iter_covariate = train_params["n_iter_covariate"] 587 | self.add_intercept: bool = train_params["add_intercept"] 588 | 589 | # build networks 590 | networks = net_list 591 | self.treatment_net: nn.Module = networks[0] 592 | self.covariate_net: Optional[nn.Module] = networks[2] 593 | 594 | self.treatment_net.to(self.device) 595 | if self.covariate_net is not None: 596 | self.covariate_net.to(self.device) 597 | 598 | self.treatment_opt = torch.optim.Adam(self.treatment_net.parameters(), 599 | weight_decay=self.treatment_weight_decay) 600 | if self.covariate_net: 601 | self.covariate_opt = torch.optim.Adam(self.covariate_net.parameters(), 602 | weight_decay=self.covariate_weight_decay) 603 | self.monitor = None 604 | if dump_folder is not None: 605 | self.monitor = DFLMonitor(t_loss, y_loss, dump_folder, self) 606 | 607 | def train(self, rand_seed: int = 42, verbose: int = 0, epoch_show: int = 20) -> float: 608 | """ 609 | 610 | Parameters 611 | ---------- 612 | rand_seed: int 613 | random seed 614 | verbose : int 615 | Determine the level of logging 616 | Returns 617 | ------- 618 | oos_result : float 619 | The performance of model evaluated by oos 620 | """ 621 | train_data = self.data_list[0] 622 | test_data = self.data_list[2] 623 | train_data_t = TrainDataSetTorch.from_numpy(train_data) 624 | test_data_t = TestDataSetTorch.from_numpy(test_data) 625 | train_data_t = train_data_t.to(self.device) 626 | test_data_t = test_data_t.to(self.device) 627 | 628 | if self.monitor is not None: 629 | validation_data = self.data_list[1] 630 | validation_data_t = TrainDataSetTorch.from_numpy(validation_data) 631 | validation_data_t = validation_data_t.to(self.device) 632 | self.monitor.configure_data(train_data_t, test_data_t, validation_data_t) 633 | 634 | self.lam *= train_data_t[0].size()[0] 635 | 636 | for t in range(self.epochs): 637 | self.update_treatment(train_data_t, verbose) 638 | if self.covariate_net: 639 | self.update_covariate_net(train_data_t, verbose) 640 | 641 | if t % epoch_show == 0 or t == self.epochs - 1: 642 | if verbose >= 1: 643 | print(f"Epoch {t} ended") 644 | if self.monitor is not None: 645 | self.monitor.record(verbose) 646 | 647 | mdl = DFLModel(self.treatment_net, self.covariate_net, self.add_intercept, self.device) 648 | mdl.fit_t(train_data_t, self.lam) 649 | torch.cuda.empty_cache() 650 | 651 | oos_loss: float = mdl.evaluate_t(self.y_loss, test_data_t).data.item() 652 | if verbose >= 1: 653 | print(f"test_loss:{oos_loss}") 654 | return oos_loss, mdl 655 | 656 | def update_treatment(self, train_data_t, verbose): 657 | 658 | self.treatment_net.train(True) 659 | if self.covariate_net: 660 | self.covariate_net.train(False) 661 | 662 | # have covariate features 663 | covariate_feature = None 664 | if self.covariate_net: 665 | covariate_feature = self.covariate_net(train_data_t.covariate).detach() 666 | 667 | for i in range(self.n_iter_treatment): 668 | self.treatment_opt.zero_grad() 669 | treatment_feature = self.treatment_net(train_data_t.treatment) 670 | res = DFLModel.fit_dfl(treatment_feature, covariate_feature, train_data_t.outcome, 671 | self.lam, self.add_intercept) 672 | if self.y_loss == 'bin': 673 | loss = res["log_loss"] 674 | else: 675 | loss = res["loss"] 676 | loss.backward() 677 | if verbose >= 2: 678 | print(f"treatment learning: {loss.item()}") 679 | self.treatment_opt.step() 680 | 681 | def update_covariate_net(self, train_data_t: TrainDataSetTorch, verbose: int): 682 | self.treatment_net.train(False) 683 | treatment_feature = self.treatment_net(train_data_t.treatment).detach() 684 | self.covariate_net.train(True) 685 | for i in range(self.n_iter_covariate): 686 | self.covariate_opt.zero_grad() 687 | covariate_feature = self.covariate_net(train_data_t.covariate) 688 | res = DFLModel.fit_dfl(treatment_feature, covariate_feature, train_data_t.outcome, 689 | self.lam, self.add_intercept) 690 | if self.y_loss == 'bin': 691 | loss = res["log_loss"] 692 | else: 693 | loss = res["loss"] 694 | loss.backward() 695 | if verbose >= 2: 696 | print(f"update covariate: {loss.item()}") 697 | self.covariate_opt.step() 698 | 699 | 700 | def build_net(t_input_dim, z_input_dim, x_input_dim): 701 | treatment_net = nn.Sequential(nn.Linear(t_input_dim, 16), 702 | nn.ReLU(), 703 | nn.Linear(16, 1)) 704 | 705 | instrumental_net = nn.Sequential(nn.Linear(z_input_dim, 128), 706 | nn.ReLU(), 707 | nn.Linear(128, 64), 708 | nn.ReLU(), 709 | nn.Linear(64, 32), 710 | nn.BatchNorm1d(32)) 711 | 712 | covariate_net = nn.Sequential(nn.Linear(x_input_dim, 128), 713 | nn.ReLU(), 714 | nn.Linear(128, 32), 715 | nn.BatchNorm1d(32), 716 | nn.ReLU(), 717 | nn.Linear(32, 16), 718 | nn.ReLU()) 719 | 720 | return treatment_net, instrumental_net, covariate_net 721 | 722 | class DFL(object): 723 | def __init__(self) -> None: 724 | self.config = { 725 | 'methodName': 'DFL', 726 | 't_loss': 'cont', 727 | 'y_loss': 'cont', 728 | "epochs": 100, 729 | "lam": 0.1, 730 | 'n_iter_treatment': 20, 731 | 'n_iter_covariate': 20, 732 | 'treatment_weight_decay': 0.0, 733 | 'covariate_weight_decay': 0.1, 734 | "add_intercept": True, 735 | 'epoch_show': 10, 736 | 'verbose': 0, 737 | 'use_gpu': True, 738 | 'seed': 2022, 739 | } 740 | 741 | def set_Configuration(self, config): 742 | self.config = config 743 | 744 | def fit(self, data, exp=-1, config=None): 745 | if config is None: 746 | config = self.config 747 | 748 | train_config = {"epochs": config["epochs"], 749 | "lam": config["lam"], 750 | 'n_iter_treatment': config['n_iter_treatment'], 751 | 'n_iter_covariate': config['n_iter_covariate'], 752 | 'treatment_weight_decay': config['treatment_weight_decay'], 753 | 'covariate_weight_decay': config['covariate_weight_decay'], 754 | "add_intercept": config["add_intercept"], 755 | } 756 | 757 | set_seed(config['seed']) 758 | data.numpy() 759 | 760 | self.z_dim = data.train.z.shape[1] 761 | self.x_dim = data.train.x.shape[1] 762 | self.t_dim = data.train.t.shape[1] 763 | 764 | t_input_dim = self.t_dim 765 | z_input_dim = self.z_dim + self.x_dim 766 | x_input_dim = self.x_dim 767 | 768 | train_z = np.concatenate((data.train.z,data.train.x),1) 769 | train_x = data.train.x 770 | val_z = np.concatenate((data.valid.z,data.valid.x),1) 771 | val_x = data.valid.x 772 | test_z = np.concatenate((data.test.z,data.test.x),1) 773 | test_x = data.test.x 774 | 775 | if config['t_loss'] == 'bin': 776 | train_t = data.train.t 777 | train_t[train_t==0] = -6.9068 # ln(1/999), y = 0.001 778 | train_t[train_t==1] = 6.9068 # ln(999), y = 0.999 779 | val_t = data.valid.t 780 | val_t[val_t==0] = -6.9068 # ln(1/999), y = 0.001 781 | val_t[val_t==1] = 6.9068 # ln(999), y = 0.999 782 | test_t = data.test.t 783 | test_t[test_t==0] = -6.9068 # ln(1/999), y = 0.001 784 | test_t[test_t==1] = 6.9068 # ln(999), y = 0.999 785 | else: 786 | train_t = data.train.t 787 | val_t = data.valid.t 788 | test_t = data.test.t 789 | 790 | if config['y_loss'] == 'bin': 791 | train_y = data.train.y 792 | train_y[train_y==0] = -6.9068 # ln(1/999), y = 0.001 793 | train_y[train_y==1] = 0.999 794 | val_y = data.valid.y 795 | val_y[val_y==0] = -6.9068 # ln(1/999), y = 0.001 796 | val_y[val_y==1] = 6.9068 # ln(999), y = 0.999 797 | test_y = data.test.y 798 | test_y[test_y==0] = -6.9068 # ln(1/999), y = 0.001 799 | test_y[test_y==1] = 6.9068 # ln(999), y = 0.999 800 | else: 801 | train_y = data.train.y 802 | val_y = data.valid.y 803 | test_y = data.test.y 804 | 805 | train_data = TrainDataSet(treatment=train_t, 806 | instrumental=train_z, 807 | covariate=train_x, 808 | outcome=train_y, 809 | structural=train_y) 810 | val_data = TrainDataSet(treatment=val_t, 811 | instrumental=val_z, 812 | covariate=val_x, 813 | outcome=val_y, 814 | structural=val_y) 815 | test_data = TestDataSet(treatment=test_t, 816 | instrumental=test_z, 817 | covariate=test_x, 818 | structural=test_y, 819 | outcome=test_y) 820 | data_list = [train_data, val_data, test_data] 821 | 822 | treatment_net, instrumental_net, covariate_net = build_net(t_input_dim, z_input_dim, x_input_dim) 823 | net_list = [treatment_net, None, covariate_net] 824 | 825 | print('Run {}-th experiment for {}. '.format(exp, config['methodName'])) 826 | 827 | trainer = DFLTrainer(config['t_loss'], config['y_loss'], data_list, net_list, train_config, config['use_gpu'], './tmp/') 828 | test_loss, mdl = trainer.train(rand_seed=config['seed'] , verbose=config['verbose'] , epoch_show=config['epoch_show'] ) 829 | 830 | def estimation(data): 831 | return mdl.predict_t(data.t-data.t, data.x), mdl.predict_t(data.t, data.x) 832 | 833 | print('End. ' + '-'*20) 834 | 835 | self.mdl = mdl 836 | self.estimation = estimation 837 | 838 | def predict(self, data=None, t=None, x=None): 839 | if data is None: 840 | data = self.data.test 841 | 842 | if x is None: 843 | x = data.x 844 | 845 | if t is None: 846 | t = data.t 847 | 848 | with torch.no_grad(): 849 | pred = self.mdl.predict(t, x) 850 | 851 | return pred 852 | 853 | def ITE(self, data=None, t=None, x=None): 854 | if data is None: 855 | data = self.data.test 856 | 857 | if x is None: 858 | x = data.x 859 | 860 | if t is None: 861 | t = data.t 862 | 863 | ITE_0 = self.mdl.predict(t-t,x) 864 | ITE_1 = self.mdl.predict(t-t+1,x) 865 | ITE_t = self.mdl.predict(t,x) 866 | 867 | return ITE_0,ITE_1,ITE_t 868 | 869 | def ATE(self, data=None, t=None, x=None): 870 | ITE_0,ITE_1,ITE_t = self.ITE(data,t,x) 871 | 872 | return np.mean(ITE_1-ITE_0), np.mean(ITE_t-ITE_0) 873 | 874 | -------------------------------------------------------------------------------- /mliv/inference/gmm/__init__.py: -------------------------------------------------------------------------------- 1 | from .agmm_v1 import AGMM 2 | from .deepgmm_v1 import DeepGMM -------------------------------------------------------------------------------- /mliv/inference/gmm/agmm_v1/__init__.py: -------------------------------------------------------------------------------- 1 | from .trainer import AGMM 2 | 3 | example = ''' 4 | from mliv.inference import AGMM 5 | 6 | model = AGMM() 7 | model.fit(data) 8 | ITE = model.predict(data.train) 9 | ATE,_ = model.ATE(data.train) 10 | ''' -------------------------------------------------------------------------------- /mliv/inference/gmm/agmm_v1/net.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import tempfile 4 | import torch 5 | import torch.nn as nn 6 | from torch.utils.data import DataLoader, TensorDataset 7 | from .oadam import OAdam 8 | from .rbflayer import RBF 9 | 10 | # TODO. This epsilon is used only because pytorch 1.5 has an instability in torch.cdist 11 | # when the input distance is close to zero, due to instability of the square root in 12 | # automatic differentiation. Should be removed once pytorch fixes the instability. 13 | # It can be set to 0 if using pytorch 1.4.0 14 | EPSILON = 1e-2 15 | 16 | 17 | def add_weight_decay(net, l2_value, skip_list=()): 18 | decay, no_decay = [], [] 19 | for name, param in net.named_parameters(): 20 | if not param.requires_grad: 21 | continue # frozen weights 22 | if len(param.shape) == 1 or name.endswith(".bias") or name in skip_list: 23 | no_decay.append(param) 24 | else: 25 | decay.append(param) 26 | return [{'params': no_decay, 'weight_decay': 0.}, {'params': decay, 'weight_decay': l2_value}] 27 | 28 | 29 | def _kernel(x, y, basis_func, sigma): 30 | return basis_func(torch.cdist(x, y + EPSILON) * torch.abs(sigma)) 31 | 32 | 33 | class _BaseAGMM: 34 | 35 | def _pretrain(self, Z, T, Y, 36 | learner_l2, adversary_l2, adversary_norm_reg, 37 | learner_lr, adversary_lr, n_epochs, bs, train_learner_every, train_adversary_every, 38 | warm_start, results_dir, device, verbose, add_sample_inds=False): 39 | """ Prepares the variables required to begin training. 40 | """ 41 | self.verbose = verbose 42 | 43 | model_dir = results_dir+'agmm_model' 44 | if not os.path.exists(model_dir): 45 | os.makedirs(model_dir) 46 | self.tempdir = tempfile.TemporaryDirectory(dir=model_dir) 47 | self.model_dir = self.tempdir.name 48 | 49 | tfboard_dir = results_dir+'agmm_tfboard' 50 | os.makedirs(os.path.dirname(tfboard_dir), exist_ok=True) 51 | self.tfboard_dir = tfboard_dir 52 | 53 | self.n_epochs = n_epochs 54 | 55 | if add_sample_inds: 56 | sample_inds = torch.tensor(np.arange(Y.shape[0])) 57 | self.train_ds = TensorDataset(Z, T, Y, sample_inds) 58 | else: 59 | self.train_ds = TensorDataset(Z, T, Y) 60 | self.train_dl = DataLoader(self.train_ds, batch_size=bs, shuffle=True) 61 | 62 | self.learner = self.learner.to(device) 63 | self.adversary = self.adversary.to(device) 64 | 65 | if not warm_start: 66 | self.learner.apply(lambda m: ( 67 | m.reset_parameters() if hasattr(m, 'reset_parameters') else None)) 68 | self.adversary.apply(lambda m: ( 69 | m.reset_parameters() if hasattr(m, 'reset_parameters') else None)) 70 | 71 | beta1 = 0. 72 | self.optimizerD = OAdam(add_weight_decay(self.learner, learner_l2), 73 | lr=learner_lr, betas=(beta1, .01)) 74 | self.optimizerG = OAdam(add_weight_decay( 75 | self.adversary, adversary_l2, skip_list=self.skip_list), lr=adversary_lr, betas=(beta1, .01)) 76 | 77 | return Z, T, Y 78 | 79 | def predict(self, T, model='avg', burn_in=0, alpha=None): 80 | """ 81 | Parameters 82 | ---------- 83 | T : treatments 84 | model : one of ('avg', 'final'), whether to use an average of models or the final 85 | burn_in : discard the first "burn_in" epochs when doing averaging 86 | alpha : if not None but a float, then it also returns the a/2 and 1-a/2, percentile of 87 | the predictions across different epochs (proxy for a confidence interval) 88 | """ 89 | if model == 'avg': 90 | preds = np.array([torch.load(os.path.join(self.model_dir,"epoch{}".format(i)))(T).cpu().data.numpy() 91 | for i in np.arange(burn_in, self.n_epochs)]) 92 | if alpha is None: 93 | return np.mean(preds, axis=0) 94 | else: 95 | return np.mean(preds, axis=0),np.percentile(preds, 100 * alpha / 2, axis=0), np.percentile(preds, 100 * (1 - alpha / 2), axis=0) 96 | if model == 'final': 97 | return torch.load(os.path.join(self.model_dir,"epoch{}".format(self.n_epochs - 1)))(T).cpu().data.numpy() 98 | if isinstance(model, int): 99 | return torch.load(os.path.join(self.model_dir,"epoch{}".format(model)))(T).cpu().data.numpy() 100 | 101 | 102 | class _BaseSupLossAGMM(_BaseAGMM): 103 | 104 | def fit(self, Z, T, Y, Z_val, T_val, Y_val, T_test_tens, G_val, 105 | learner_l2=1e-3, adversary_l2=1e-4, adversary_norm_reg=1e-3, 106 | learner_lr=0.001, adversary_lr=0.001, n_epochs=100, bs=100, train_learner_every=1, train_adversary_every=1, 107 | ols_weight=0., warm_start=False, results_dir='.', device=None, verbose=0): 108 | """ 109 | Parameters 110 | ---------- 111 | Z : instruments 112 | T : treatments 113 | Y : outcome 114 | learner_l2, adversary_l2 : l2_regularization of parameters of learner and adversary 115 | adversary_norm_reg : adveresary norm regularization weight 116 | learner_lr : learning rate of the Adam optimizer for learner 117 | adversary_lr : learning rate of the Adam optimizer for adversary 118 | n_epochs : how many passes over the data 119 | bs : batch size 120 | train_learner_every : after how many training iterations of the adversary should we train the learner 121 | ols_weight : weight on OLS (square loss) objective 122 | warm_start : if False then network parameters are initialized at the beginning, otherwise we start 123 | from their current weights 124 | results_dir : folder where to store the learned models after every epoch 125 | """ 126 | 127 | Z, T, Y = self._pretrain(Z, T, Y, 128 | learner_l2, adversary_l2, adversary_norm_reg, 129 | learner_lr, adversary_lr, n_epochs, bs, train_learner_every, train_adversary_every, 130 | warm_start, results_dir, device, verbose) 131 | 132 | for epoch in range(n_epochs): 133 | 134 | if self.verbose > 0: 135 | print("Epoch #", epoch, sep="") 136 | 137 | for it, (zb, xb, yb) in enumerate(self.train_dl): 138 | 139 | zb, xb, yb = map(lambda x: x.to(device), (zb, xb, yb)) 140 | 141 | if (it % train_learner_every == 0): 142 | self.learner.train() 143 | pred = self.learner(xb) 144 | test = self.adversary(zb) 145 | D_loss = torch.mean((yb - pred) * test) + ols_weight * torch.mean((yb - pred)**2) 146 | self.optimizerD.zero_grad() 147 | D_loss.backward() 148 | self.optimizerD.step() 149 | self.learner.eval() 150 | 151 | if (it % train_adversary_every == 0): 152 | self.adversary.train() 153 | pred = self.learner(xb) 154 | reg = 0 155 | if self.adversary_reg: 156 | test, reg = self.adversary(zb, reg=True) 157 | else: 158 | test = self.adversary(zb) 159 | G_loss = - torch.mean((yb - pred) * test) + torch.mean(test**2) 160 | G_loss += adversary_norm_reg * reg 161 | self.optimizerG.zero_grad() 162 | G_loss.backward() 163 | self.optimizerG.step() 164 | self.adversary.eval() 165 | 166 | torch.save(self.learner, os.path.join(self.model_dir, "epoch{}".format(epoch))) 167 | 168 | return self 169 | 170 | 171 | class AGMM_Net(_BaseSupLossAGMM): 172 | 173 | def __init__(self, learner, adversary): 174 | """ 175 | Parameters 176 | ---------- 177 | learner : a pytorch neural net module 178 | adversary : a pytorch neural net module 179 | """ 180 | self.learner = learner 181 | self.adversary = adversary 182 | # whether we have a norm penalty for the adversary 183 | self.adversary_reg = False 184 | # which adversary parameters to not ell2 penalize 185 | self.skip_list = [] 186 | 187 | 188 | 189 | -------------------------------------------------------------------------------- /mliv/inference/gmm/agmm_v1/oadam.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | """ 5 | Forked from the repository: 6 | https://github.com/georgepar/optimistic-adam 7 | By George Paraskevopoulos on April 15, 2020 8 | """ 9 | import math 10 | import torch 11 | from torch.optim import Optimizer 12 | 13 | 14 | class OAdam(Optimizer): 15 | """Implements optimistic Adam algorithm. 16 | 17 | It has been proposed in `Training GANs with Optimism`_. 18 | 19 | Arguments: 20 | params (iterable): iterable of parameters to optimize or dicts defining 21 | parameter groups 22 | lr (float, optional): learning rate (default: 1e-3) 23 | betas (Tuple[float, float], optional): coefficients used for computing 24 | running averages of gradient and its square (default: (0.9, 0.999)) 25 | eps (float, optional): term added to the denominator to improve 26 | numerical stability (default: 1e-8) 27 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 28 | amsgrad (boolean, optional): whether to use the AMSGrad variant of this 29 | algorithm from the paper `On the Convergence of Adam and Beyond`_ 30 | (default: False) 31 | 32 | .. _Training GANs with Optimism: 33 | https://arxiv.org/abs/1711.00141 34 | """ 35 | 36 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, 37 | weight_decay=0, amsgrad=False): 38 | if not 0.0 <= lr: 39 | raise ValueError("Invalid learning rate: {}".format(lr)) 40 | if not 0.0 <= eps: 41 | raise ValueError("Invalid epsilon value: {}".format(eps)) 42 | if not 0.0 <= betas[0] < 1.0: 43 | raise ValueError( 44 | "Invalid beta parameter at index 0: {}".format(betas[0])) 45 | if not 0.0 <= betas[1] < 1.0: 46 | raise ValueError( 47 | "Invalid beta parameter at index 1: {}".format(betas[1])) 48 | defaults = dict(lr=lr, betas=betas, eps=eps, 49 | weight_decay=weight_decay, amsgrad=amsgrad) 50 | super(OAdam, self).__init__(params, defaults) 51 | 52 | def __setstate__(self, state): 53 | super(Adam, self).__setstate__(state) 54 | for group in self.param_groups: 55 | group.setdefault('amsgrad', False) 56 | 57 | def step(self, closure=None): 58 | """Performs a single optimization step. 59 | 60 | Arguments: 61 | closure (callable, optional): A closure that reevaluates the model 62 | and returns the loss. 63 | """ 64 | loss = None 65 | if closure is not None: 66 | loss = closure() 67 | 68 | for group in self.param_groups: 69 | for p in group['params']: 70 | if p.grad is None: 71 | continue 72 | grad = p.grad.data 73 | if grad.is_sparse: 74 | raise RuntimeError( 75 | 'Adam does not support sparse gradients, please consider SparseAdam instead') 76 | amsgrad = group['amsgrad'] 77 | 78 | state = self.state[p] 79 | 80 | # State initialization 81 | if len(state) == 0: 82 | state['step'] = 0 83 | # Exponential moving average of gradient values 84 | state['exp_avg'] = torch.zeros_like(p.data) 85 | # Exponential moving average of squared gradient values 86 | state['exp_avg_sq'] = torch.zeros_like(p.data) 87 | if amsgrad: 88 | # Maintains max of all exp. moving avg. of sq. grad. values 89 | state['max_exp_avg_sq'] = torch.zeros_like(p.data) 90 | 91 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 92 | if amsgrad: 93 | max_exp_avg_sq = state['max_exp_avg_sq'] 94 | beta1, beta2 = group['betas'] 95 | 96 | state['step'] += 1 97 | 98 | if group['weight_decay'] != 0: 99 | grad.add_(group['weight_decay'], p.data) 100 | 101 | bias_correction1 = 1 - beta1 ** state['step'] 102 | bias_correction2 = 1 - beta2 ** state['step'] 103 | step_size = group['lr'] * \ 104 | math.sqrt(bias_correction2) / bias_correction1 105 | 106 | # Optimistic update :) 107 | p.data.addcdiv_(step_size, exp_avg, 108 | exp_avg_sq.sqrt().add(group['eps'])) 109 | 110 | # Decay the first and second moment running average coefficient 111 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 112 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 113 | if amsgrad: 114 | # Maintains the maximum of all 2nd moment running avg. till now 115 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) 116 | # Use the max. for normalizing running avg. of gradient 117 | denom = max_exp_avg_sq.sqrt().add_(group['eps']) 118 | else: 119 | denom = exp_avg_sq.sqrt().add_(group['eps']) 120 | 121 | p.data.addcdiv_(-2.0 * step_size, exp_avg, denom) 122 | 123 | return loss 124 | -------------------------------------------------------------------------------- /mliv/inference/gmm/agmm_v1/rbflayer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | """ 5 | Forked from the repository: 6 | https://github.com/JeremyLinux/PyTorch-Radial-Basis-Function-Layer 7 | By Hamish Flynn on April 15, 2020 8 | """ 9 | import torch 10 | import torch.nn as nn 11 | 12 | # RBF Layer 13 | 14 | 15 | class RBF(nn.Module): 16 | """ 17 | Transforms incoming data using a given radial basis function: 18 | u_{i} = rbf(||x - c_{i}|| / s_{i}) 19 | 20 | Arguments: 21 | in_features: size of each input sample 22 | out_features: size of each output sample 23 | 24 | Shape: 25 | - Input: (N, in_features) where N is an arbitrary batch size 26 | - Output: (N, out_features) where N is an arbitrary batch size 27 | 28 | Attributes: 29 | centres: the learnable centres of shape (out_features, in_features). 30 | The values are initialised from a standard normal distribution. 31 | Normalising inputs to have mean 0 and standard deviation 1 is 32 | recommended. 33 | 34 | sigmas: the learnable scaling factors of shape (out_features). 35 | The values are initialised as ones. 36 | 37 | basis_func: the radial basis function used to transform the scaled 38 | distances. 39 | """ 40 | 41 | def __init__(self, in_features, out_features, basis_func, centres=None, sigmas=None, 42 | trainable=True): 43 | super(RBF, self).__init__() 44 | self.in_features = in_features 45 | self.out_features = out_features 46 | self.centres = nn.Parameter( 47 | torch.Tensor(out_features, in_features)) 48 | self.centres.requires_grad = trainable 49 | self.sigmas = nn.Parameter(torch.Tensor(1, out_features)) 50 | self.sigmas.requires_grad = trainable 51 | self.basis_func = basis_func 52 | self.pd = nn.PairwiseDistance() 53 | self.init_centres = centres 54 | self.init_sigmas = sigmas 55 | self.reset_parameters() 56 | 57 | def reset_parameters(self): 58 | if self.init_centres is not None: 59 | self.centres.data = torch.Tensor( 60 | self.init_centres).to(self.centres.device) 61 | else: 62 | nn.init.normal_(self.centres, 0, 1) 63 | if self.init_sigmas is not None: 64 | self.sigmas.data = torch.Tensor( 65 | self.init_sigmas).to(self.sigmas.device).T 66 | else: 67 | nn.init.constant_(self.sigmas, 1) 68 | 69 | def forward(self, input): 70 | distances = torch.cdist(input, self.centres) * torch.abs(self.sigmas) 71 | return self.basis_func(distances) 72 | 73 | 74 | # RBFs 75 | 76 | def gaussian(alpha): 77 | phi = torch.exp(-1 * alpha.pow(2)) 78 | return phi 79 | 80 | 81 | def linear(alpha): 82 | phi = alpha 83 | return phi 84 | 85 | 86 | def quadratic(alpha): 87 | phi = alpha.pow(2) 88 | return phi 89 | 90 | 91 | def inverse_quadratic(alpha): 92 | phi = torch.ones_like(alpha) / (torch.ones_like(alpha) + alpha.pow(2)) 93 | return phi 94 | 95 | 96 | def multiquadric(alpha): 97 | phi = (torch.ones_like(alpha) + alpha.pow(2)).pow(0.5) 98 | return phi 99 | 100 | 101 | def inverse_multiquadric(alpha): 102 | phi = torch.ones_like( 103 | alpha) / (torch.ones_like(alpha) + alpha.pow(2)).pow(0.5) 104 | return phi 105 | 106 | 107 | def spline(alpha): 108 | phi = (alpha.pow(2) * torch.log(alpha + torch.ones_like(alpha))) 109 | return phi 110 | 111 | 112 | def poisson_one(alpha): 113 | phi = (alpha - torch.ones_like(alpha)) * torch.exp(-alpha) 114 | return phi 115 | 116 | 117 | def poisson_two(alpha): 118 | phi = ((alpha - 2 * torch.ones_like(alpha)) / 2 * torch.ones_like(alpha)) \ 119 | * alpha * torch.exp(-alpha) 120 | return phi 121 | 122 | 123 | def matern32(alpha): 124 | phi = (torch.ones_like(alpha) + 3**0.5 * alpha) * \ 125 | torch.exp(-3**0.5 * alpha) 126 | return phi 127 | 128 | 129 | def matern52(alpha): 130 | phi = (torch.ones_like(alpha) + 5**0.5 * alpha + (5 / 3) 131 | * alpha.pow(2)) * torch.exp(-5**0.5 * alpha) 132 | return phi 133 | 134 | 135 | def basis_func_dict(): 136 | """ 137 | A helper function that returns a dictionary containing each RBF 138 | """ 139 | 140 | bases = {'gaussian': gaussian, 141 | 'linear': linear, 142 | 'quadratic': quadratic, 143 | 'inverse quadratic': inverse_quadratic, 144 | 'multiquadric': multiquadric, 145 | 'inverse multiquadric': inverse_multiquadric, 146 | 'spline': spline, 147 | 'poisson one': poisson_one, 148 | 'poisson two': poisson_two, 149 | 'matern32': matern32, 150 | 'matern52': matern52} 151 | return bases 152 | -------------------------------------------------------------------------------- /mliv/inference/gmm/agmm_v1/trainer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import numpy as np 4 | from .net import AGMM_Net 5 | from mliv.utils import set_seed, cat 6 | 7 | example = ''' 8 | from mliv.inference import AGMM 9 | 10 | model = AGMM() 11 | model.fit(data) 12 | ITE = model.predict(data.train) 13 | ATE,_ = model.ATE(data.train) 14 | ''' 15 | 16 | class AGMM(object): 17 | def __init__(self) -> None: 18 | self.config = { 19 | 'methodName': 'AGMM', 20 | 'dropout': 0.1, 21 | 'n_hidden': 100, 22 | 'g_features': 100, 23 | 'learner_lr': 1e-4, 24 | 'adversary_lr': 1e-4, 25 | 'learner_l2': 1e-3, 26 | 'adversary_l2': 1e-4, 27 | 'adversary_norm_reg': 1e-3, 28 | 'epochs': 100, 29 | 'batch_size': 100, 30 | 'sigma': 2.0, 31 | 'n_centers': 100, 32 | 'device': 'cuda:0', 33 | 'mode': 'final', 34 | 'resultDir': './Results/tmp/', 35 | 'seed': 2022, 36 | } 37 | 38 | def set_Configuration(self, config): 39 | self.config = config 40 | 41 | def fit(self, data, exp=-1, config=None): 42 | if config is None: 43 | config = self.config 44 | 45 | device = config['device'] 46 | p = config['dropout'] 47 | n_hidden = config['n_hidden'] 48 | g_features = config['g_features'] 49 | learner_lr = config['learner_lr'] 50 | adversary_lr = config['adversary_lr'] 51 | learner_l2 = config['learner_l2'] 52 | adversary_l2 = config['adversary_l2'] 53 | adversary_norm_reg = config['adversary_norm_reg'] 54 | epochs = config['epochs'] 55 | bs = config['batch_size'] 56 | sigma = config['sigma'] / g_features 57 | n_centers = config['n_centers'] 58 | resultDir = config['resultDir'] 59 | self.mode = config['mode'] 60 | 61 | set_seed(config['seed']) 62 | data.numpy() 63 | 64 | self.z_dim = data.train.z.shape[1] 65 | self.x_dim = data.train.x.shape[1] 66 | self.t_dim = data.train.t.shape[1] 67 | 68 | learner = nn.Sequential(nn.Dropout(p=p), nn.Linear(self.t_dim+self.x_dim, n_hidden), nn.LeakyReLU(), 69 | nn.Dropout(p=p), nn.Linear(n_hidden, n_hidden), nn.ReLU(), 70 | nn.Dropout(p=p), nn.Linear(n_hidden, 1)) 71 | 72 | adversary_fn = nn.Sequential(nn.Dropout(p=p), nn.Linear(self.z_dim+self.x_dim, n_hidden), nn.LeakyReLU(), 73 | nn.Dropout(p=p), nn.Linear(n_hidden, n_hidden), nn.ReLU(), 74 | nn.Dropout(p=p), nn.Linear(n_hidden, 1)) 75 | 76 | 77 | Z_train, T_train, Y_train, G_train = map(lambda x: torch.Tensor(x).to(device), (np.concatenate([data.train.z, data.train.x],1), np.concatenate([data.train.t, data.train.x],1), data.train.y, data.train.g)) 78 | Z_val, T_val, Y_val, G_val = map(lambda x: torch.Tensor(x).to(device), (np.concatenate([data.valid.z, data.valid.x],1), np.concatenate([data.valid.t, data.valid.x],1), data.valid.y, data.valid.g)) 79 | T_test_tens = torch.Tensor(np.concatenate([data.test.t, data.test.x],1)).to(device) 80 | G_test_tens = torch.Tensor(data.test.g).to(device) 81 | 82 | print('Run {}-th experiment for {}. '.format(exp, config['methodName'])) 83 | 84 | agmm = AGMM_Net(learner, adversary_fn).fit(Z_train, T_train, Y_train, Z_val, T_val, Y_val, T_test_tens, G_val, 85 | learner_lr=learner_lr, adversary_lr=adversary_lr, 86 | learner_l2=learner_l2, adversary_l2=adversary_l2, 87 | n_epochs=epochs, bs=bs, 88 | results_dir=resultDir, device=device, verbose=0) 89 | 90 | print('End. ' + '-'*20) 91 | 92 | def estimation(data): 93 | input0 = torch.Tensor(np.concatenate([data.t-data.t, data.x],1)).to(device) 94 | point0 = agmm.predict(input0, model=self.mode) 95 | 96 | inputt = torch.Tensor(np.concatenate([data.t, data.x],1)).to(device) 97 | pointt = agmm.predict(inputt, model=self.mode) 98 | 99 | return point0, pointt 100 | 101 | self.estimation = estimation 102 | self.device = device 103 | self.agmm = agmm 104 | 105 | def predict(self, data=None, t=None, x=None): 106 | if data is None: 107 | data = self.data.test 108 | 109 | if x is None: 110 | x = data.x 111 | 112 | if t is None: 113 | t = data.t 114 | 115 | pred = self.agmm.predict(torch.Tensor(np.concatenate([t, x],1)).to(self.device), model=self.mode) 116 | 117 | return pred 118 | 119 | def ITE(self, data=None, t=None, x=None): 120 | if data is None: 121 | data = self.data.test 122 | 123 | if x is None: 124 | x = data.x 125 | 126 | if t is None: 127 | t = data.t 128 | 129 | ITE_0 = self.agmm.predict(torch.Tensor(np.concatenate([t-t, x],1)).to(self.device), model=self.mode) 130 | ITE_1 = self.agmm.predict(torch.Tensor(np.concatenate([t-t+1, x],1)).to(self.device), model=self.mode) 131 | ITE_t = self.agmm.predict(torch.Tensor(np.concatenate([t, x],1)).to(self.device), model=self.mode) 132 | 133 | return ITE_0,ITE_1,ITE_t 134 | 135 | def ATE(self, data=None, t=None, x=None): 136 | ITE_0,ITE_1,ITE_t = self.ITE(data,t,x) 137 | 138 | return np.mean(ITE_1-ITE_0), np.mean(ITE_t-ITE_0) 139 | -------------------------------------------------------------------------------- /mliv/inference/gmm/deepgmm_v1/__init__.py: -------------------------------------------------------------------------------- 1 | from .trainer import DeepGMM 2 | 3 | example = ''' 4 | from mliv.inference import DeepGMM 5 | 6 | model = DeepGMM() 7 | model.fit(data) 8 | ITE = model.predict(data.train) 9 | ATE,_ = model.ATE(data.train) 10 | ''' -------------------------------------------------------------------------------- /mliv/inference/gmm/deepgmm_v1/dataclass.py: -------------------------------------------------------------------------------- 1 | from typing import NamedTuple, Optional 2 | import numpy as np 3 | import torch 4 | 5 | def load_TrainDataSet(data): 6 | try: 7 | x = data.x 8 | except: 9 | x = None 10 | 11 | train_data = TrainDataSet(treatment=data.t, 12 | instrumental=data.z, 13 | covariate=x, 14 | outcome=data.y, 15 | structural=data.g) 16 | 17 | return train_data 18 | 19 | def load_TestDataSet(data): 20 | try: 21 | z = data.z 22 | except: 23 | z = None 24 | 25 | try: 26 | x = data.x 27 | except: 28 | x = None 29 | 30 | try: 31 | y = data.y 32 | except: 33 | y = None 34 | 35 | test_data = TestDataSet(treatment=data.t, 36 | instrumental=z, 37 | covariate=x, 38 | outcome=y, 39 | structural=data.g) 40 | 41 | return test_data 42 | 43 | class TrainDataSet(NamedTuple): 44 | treatment: np.ndarray 45 | instrumental: np.ndarray 46 | covariate: Optional[np.ndarray] 47 | outcome: np.ndarray 48 | structural: np.ndarray 49 | 50 | class TestDataSet(NamedTuple): 51 | treatment: np.ndarray 52 | covariate: Optional[np.ndarray] 53 | structural: np.ndarray 54 | instrumental: Optional[np.ndarray] 55 | outcome: Optional[np.ndarray] 56 | 57 | class TrainDataSetTorch(NamedTuple): 58 | treatment: torch.Tensor 59 | instrumental: torch.Tensor 60 | covariate: torch.Tensor 61 | outcome: torch.Tensor 62 | structural: torch.Tensor 63 | 64 | @classmethod 65 | def from_numpy(cls, train_data: TrainDataSet): 66 | covariate = None 67 | if train_data.covariate is not None: 68 | covariate = torch.tensor(train_data.covariate, dtype=torch.float32) 69 | return TrainDataSetTorch(treatment=torch.tensor(train_data.treatment, dtype=torch.float32), 70 | instrumental=torch.tensor(train_data.instrumental, dtype=torch.float32), 71 | covariate=covariate, 72 | outcome=torch.tensor(train_data.outcome, dtype=torch.float32), 73 | structural=torch.tensor(train_data.structural, dtype=torch.float32)) 74 | 75 | def to(self, device): 76 | covariate = None 77 | if self.covariate is not None: 78 | covariate = self.covariate.to(device) 79 | return TrainDataSetTorch(treatment=self.treatment.to(device), 80 | instrumental=self.instrumental.to(device), 81 | covariate=covariate, 82 | outcome=self.outcome.to(device), 83 | structural=self.structural.to(device)) 84 | 85 | 86 | class TestDataSetTorch(NamedTuple): 87 | treatment: torch.Tensor 88 | instrumental: torch.Tensor 89 | covariate: torch.Tensor 90 | outcome: torch.Tensor 91 | structural: torch.Tensor 92 | 93 | @classmethod 94 | def from_numpy(cls, test_data: TestDataSet): 95 | covariate = None 96 | instrumental = None 97 | outcome = None 98 | if test_data.covariate is not None: 99 | covariate = torch.tensor(test_data.covariate, dtype=torch.float32) 100 | if test_data.instrumental is not None: 101 | instrumental = torch.tensor(test_data.instrumental, dtype=torch.float32) 102 | if test_data.outcome is not None: 103 | outcome = torch.tensor(test_data.outcome, dtype=torch.float32) 104 | return TestDataSetTorch(treatment=torch.tensor(test_data.treatment, dtype=torch.float32), 105 | covariate=covariate, 106 | instrumental=instrumental, 107 | outcome=outcome, 108 | structural=torch.tensor(test_data.structural, dtype=torch.float32)) 109 | def to(self, device): 110 | covariate = None 111 | instrumental = None 112 | outcome = None 113 | if self.covariate is not None: 114 | covariate = self.covariate.to(device) 115 | if self.instrumental is not None: 116 | instrumental = self.instrumental.to(device) 117 | if self.outcome is not None: 118 | outcome = self.outcome.to(device) 119 | return TestDataSetTorch(treatment=self.treatment.to(device), 120 | covariate=covariate, 121 | instrumental=instrumental, 122 | outcome=outcome, 123 | structural=self.structural.to(device)) -------------------------------------------------------------------------------- /mliv/inference/gmm/deepgmm_v1/model.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | import torch 3 | from torch import nn 4 | import numpy as np 5 | import logging 6 | 7 | from .utils.pytorch_linear_reg_utils import fit_linear, linear_reg_pred, outer_prod, add_const_col 8 | from .dataclass import TrainDataSet, TestDataSet, TrainDataSetTorch, TestDataSetTorch 9 | 10 | logger = logging.getLogger() 11 | 12 | 13 | class DeepGMMModel: 14 | 15 | def __init__(self, 16 | primal_net: nn.Module, 17 | dual_net: nn.Module 18 | ): 19 | self.primal_net = primal_net 20 | self.dual_net = dual_net 21 | 22 | def predict_t(self, treatment: torch.Tensor): 23 | self.primal_net.train(False) 24 | return self.primal_net(treatment) 25 | 26 | def predict(self, treatment: np.ndarray): 27 | treatment_t = torch.tensor(treatment, dtype=torch.float32) 28 | return self.predict_t(treatment_t).data.numpy() 29 | 30 | def evaluate_t(self, test_data: TestDataSetTorch): 31 | target = test_data.structural 32 | with torch.no_grad(): 33 | pred = self.predict_t(test_data.treatment) 34 | return (torch.norm((target - pred)) ** 2) / target.size()[0] 35 | 36 | def evaluate(self, test_data: TestDataSet): 37 | return self.evaluate_t(TestDataSetTorch.from_numpy(test_data)).data.item() 38 | -------------------------------------------------------------------------------- /mliv/inference/gmm/deepgmm_v1/nn_structure/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, Optional 2 | 3 | import torch 4 | from torch import nn 5 | from torch.nn import functional as F 6 | from torch.nn.utils import spectral_norm 7 | 8 | 9 | from .nn_structure_for_demand_old import build_net_for_demand_old 10 | from .nn_structure_for_sin import build_net_for_sin 11 | from .nn_structure_for_dsprite import build_net_for_dsprite 12 | from .nn_structure_for_demand_image import build_net_for_demand_image 13 | 14 | import logging 15 | 16 | logger = logging.getLogger() 17 | 18 | 19 | def build_extractor(data_name: str) -> Tuple[nn.Module, nn.Module]: 20 | if data_name == "demand_old": 21 | logger.info("build old model without image") 22 | return build_net_for_demand_old() 23 | elif data_name == "sin": 24 | return build_net_for_sin() 25 | elif data_name == "dsprite": 26 | return build_net_for_dsprite() 27 | elif data_name == "demand_image": 28 | return build_net_for_demand_image() 29 | else: 30 | raise ValueError(f"data name {data_name} is not valid") 31 | -------------------------------------------------------------------------------- /mliv/inference/gmm/deepgmm_v1/nn_structure/nn_structure_for_demand_image.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.nn import functional as F 4 | from torch.nn.utils import spectral_norm 5 | 6 | 7 | class ImageFeature(nn.Module): 8 | 9 | def __init__(self, num_dense_feature: int): 10 | super(ImageFeature, self).__init__() 11 | self.num_dense_feature = num_dense_feature 12 | self.conv1 = spectral_norm(nn.Conv2d(1, 64, 3)) 13 | self.conv2 = spectral_norm(nn.Conv2d(64, 64, 3)) 14 | self.maxpool = nn.MaxPool2d(2) 15 | self.dropout1 = nn.Dropout(0.1) 16 | self.dropout2 = nn.Dropout(0.1) 17 | self.batch = nn.BatchNorm1d(9216) 18 | self.linear1 = nn.Linear(9216, 128) 19 | self.linear2 = nn.Linear(128, 64) 20 | 21 | def forward(self, data): 22 | dense = data[:, :self.num_dense_feature] 23 | image = data[:, self.num_dense_feature:] 24 | image = image.reshape((-1, 1, 28, 28)) 25 | image_feature = F.relu(self.conv1(image)) 26 | image_feature = self.maxpool(F.relu(self.conv2(image_feature))) 27 | image_feature = torch.flatten(image_feature, start_dim=1) 28 | image_feature = self.dropout1(image_feature) 29 | image_feature = self.dropout2(F.relu(self.linear1(image_feature))) 30 | image_feature = self.linear2(image_feature) 31 | return torch.cat([dense, image_feature], dim=1) 32 | 33 | 34 | class LimitCol(nn.Module): 35 | 36 | def __init__(self, ndim: int): 37 | super(LimitCol, self).__init__() 38 | self.ndim = ndim 39 | 40 | def forward(self, data): 41 | return data[:, :self.ndim] 42 | 43 | 44 | def build_net_for_demand_image(): 45 | 46 | dual_net = nn.Sequential(ImageFeature(2), 47 | nn.Linear(66, 32), 48 | nn.BatchNorm1d(32), 49 | nn.ReLU(), 50 | nn.Linear(32, 1)) 51 | 52 | primal_net = nn.Sequential(ImageFeature(2), 53 | nn.Linear(66, 32), 54 | nn.BatchNorm1d(32), 55 | nn.ReLU(), 56 | nn.Linear(32, 1)) 57 | 58 | return primal_net, dual_net 59 | -------------------------------------------------------------------------------- /mliv/inference/gmm/deepgmm_v1/nn_structure/nn_structure_for_demand_old.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from typing import Tuple 4 | 5 | 6 | def build_net_for_demand_old() -> Tuple[nn.Module, nn.Module]: 7 | response_net = nn.Sequential(nn.Linear(3, 128), 8 | nn.ReLU(), 9 | nn.Linear(128, 64), 10 | nn.ReLU(), 11 | nn.Linear(64, 32), 12 | nn.Tanh(), 13 | nn.Linear(32, 1)) 14 | 15 | dual_net = nn.Sequential(nn.Linear(3, 128), 16 | nn.ReLU(), 17 | nn.Linear(128, 64), 18 | nn.ReLU(), 19 | nn.Linear(64, 1)) 20 | 21 | return response_net, dual_net 22 | -------------------------------------------------------------------------------- /mliv/inference/gmm/deepgmm_v1/nn_structure/nn_structure_for_dsprite.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.nn import functional as F 4 | from torch.nn.utils import spectral_norm 5 | 6 | 7 | class View(nn.Module): 8 | def __init__(self, size): 9 | super(View, self).__init__() 10 | self.size = size 11 | 12 | def forward(self, tensor): 13 | return tensor.view(self.size) 14 | 15 | 16 | class ImageFeature(nn.Module): 17 | 18 | def __init__(self): 19 | super(ImageFeature, self).__init__() 20 | self.treatment_net = nn.Sequential( 21 | nn.Conv2d(1, 32, 4, 2, 1), # B, 32, 32, 32 22 | nn.ReLU(True), 23 | nn.Conv2d(32, 32, 4, 2, 1), # B, 32, 16, 16 24 | nn.ReLU(True), 25 | nn.Conv2d(32, 32, 4, 2, 1), # B, 32, 8, 8 26 | nn.ReLU(True), 27 | nn.Conv2d(32, 32, 4, 2, 1), # B, 32, 4, 4 28 | nn.ReLU(True), 29 | View((-1, 32 * 4 * 4)), # B, 512 30 | nn.BatchNorm1d(32 * 4 * 4), 31 | nn.Linear(32 * 4 * 4, 256), # B, 256 32 | nn.ReLU(True), 33 | nn.Linear(256, 128), # B, 256 34 | nn.ReLU(True), 35 | nn.Linear(128, 32), # B, z_dim*2 36 | nn.BatchNorm1d(32), 37 | nn.ReLU() 38 | ) 39 | 40 | def forward(self, data): 41 | image = data.reshape((-1, 1, 64, 64)) 42 | return self.treatment_net(image) 43 | 44 | 45 | def build_net_for_dsprite(): 46 | primal_net = nn.Sequential(spectral_norm(nn.Linear(64 * 64, 1024)), 47 | nn.ReLU(), 48 | spectral_norm(nn.Linear(1024, 512)), 49 | nn.ReLU(), 50 | nn.BatchNorm1d(512), 51 | spectral_norm(nn.Linear(512, 128)), 52 | nn.ReLU(), 53 | spectral_norm(nn.Linear(128, 32)), 54 | nn.BatchNorm1d(32), 55 | nn.Tanh(), 56 | nn.Linear(32, 1)) 57 | 58 | # treatment_net = ImageFeature() 59 | dual_net = nn.Sequential(spectral_norm(nn.Linear(3, 256)), 60 | nn.ReLU(), 61 | spectral_norm(nn.Linear(256, 128)), 62 | nn.ReLU(), 63 | nn.BatchNorm1d(128), 64 | spectral_norm(nn.Linear(128, 128)), 65 | nn.ReLU(), 66 | nn.BatchNorm1d(128), 67 | spectral_norm(nn.Linear(128, 32)), 68 | nn.BatchNorm1d(32), 69 | nn.ReLU(), 70 | nn.Linear(32, 1)) 71 | 72 | return primal_net, dual_net 73 | -------------------------------------------------------------------------------- /mliv/inference/gmm/deepgmm_v1/nn_structure/nn_structure_for_sin.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from typing import Tuple 4 | 5 | 6 | def build_net_for_sin() -> Tuple[nn.Module, nn.Module]: 7 | response_net = nn.Sequential(nn.Linear(1, 20), 8 | nn.LeakyReLU(), 9 | nn.Linear(20, 3), 10 | nn.LeakyReLU(), 11 | nn.Linear(3, 1)) 12 | 13 | dual_net = nn.Sequential(nn.Linear(2, 20), 14 | nn.LeakyReLU(), 15 | nn.Linear(20, 1)) 16 | 17 | return response_net, dual_net 18 | -------------------------------------------------------------------------------- /mliv/inference/gmm/deepgmm_v1/trainer.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any, Optional, List 2 | import torch 3 | from torch import nn 4 | import numpy as np 5 | 6 | from .model import DeepGMMModel 7 | from .dataclass import TrainDataSet, TrainDataSetTorch, TestDataSetTorch, TestDataSet 8 | from mliv.utils import set_seed, cat 9 | 10 | example = ''' 11 | from mliv.inference import DeepGMM 12 | 13 | model = DeepGMM() 14 | model.fit(data) 15 | ITE = model.predict(data.train) 16 | ATE,_ = model.ATE(data.train) 17 | ''' 18 | 19 | def build_net_for_demand(z_dim, x_dim, t_dim): 20 | response_net = nn.Sequential(nn.Linear(z_dim + x_dim, 128), 21 | nn.ReLU(), 22 | nn.Linear(128, 64), 23 | nn.ReLU(), 24 | nn.Linear(64, 32), 25 | nn.Tanh(), 26 | nn.Linear(32, 1)) 27 | 28 | dual_net = nn.Sequential(nn.Linear(t_dim + x_dim, 128), 29 | nn.ReLU(), 30 | nn.Linear(128, 64), 31 | nn.ReLU(), 32 | nn.Linear(64, 1)) 33 | 34 | return response_net, dual_net 35 | 36 | class DeepGMMTrainer(object): 37 | 38 | def __init__(self, data_list: List, net_list: List, train_params: Dict[str, Any], 39 | device: str = 'cpu'): 40 | self.data_list = data_list 41 | self.device = device if torch.cuda.is_available() else 'cpu' 42 | 43 | # configure training params 44 | self.dual_iter: int = train_params["dual_iter"] 45 | self.primal_iter: int = train_params["primal_iter"] 46 | self.epochs: int = train_params["epochs"] 47 | 48 | # build networks 49 | networks = net_list 50 | self.primal_net: nn.Module = networks[0] 51 | self.dual_net: nn.Module = networks[1] 52 | self.primal_weight_decay = train_params["primal_weight_decay"] 53 | self.dual_weight_decay = train_params["dual_weight_decay"] 54 | 55 | self.primal_net.to(self.device) 56 | self.dual_net.to(self.device) 57 | 58 | self.primal_opt = torch.optim.Adam(self.primal_net.parameters(), 59 | weight_decay=self.primal_weight_decay, 60 | lr=0.0005, betas=(0.5, 0.9)) 61 | self.dual_opt = torch.optim.Adam(self.dual_net.parameters(), 62 | weight_decay=self.dual_weight_decay, 63 | lr=0.0025, betas=(0.5, 0.9)) 64 | 65 | # build monitor 66 | self.monitor = None 67 | 68 | def train(self, rand_seed: int = 42, verbose: int = 0, epoch_show: int = 20) -> float: 69 | """ 70 | 71 | Parameters 72 | ---------- 73 | rand_seed: int 74 | random seed 75 | verbose : int 76 | Determine the level of logging 77 | Returns 78 | ------- 79 | oos_result : float 80 | The performance of model evaluated by oos 81 | """ 82 | train_data = self.data_list[0] 83 | test_data = self.data_list[2] 84 | if train_data.covariate is not None: 85 | train_data = TrainDataSet(treatment=np.concatenate([train_data.treatment, train_data.covariate], axis=1), 86 | structural=train_data.structural, 87 | covariate=None, 88 | instrumental=train_data.instrumental, 89 | outcome=train_data.outcome) 90 | test_data = TestDataSet(treatment=np.concatenate([test_data.treatment, test_data.covariate], axis=1), 91 | covariate=None, 92 | structural=test_data.structural) 93 | 94 | train_data_t = TrainDataSetTorch.from_numpy(train_data) 95 | test_data_t = TestDataSetTorch.from_numpy(test_data) 96 | 97 | train_data_t = train_data_t.to(self.device) 98 | test_data_t = test_data_t.to(self.device) 99 | 100 | for t in range(self.epochs): 101 | self.dual_update(train_data_t, verbose) 102 | self.primal_update(train_data_t, verbose) 103 | if t % epoch_show == 0 or t == self.epochs - 1: 104 | print(f"Epoch {t} ended") 105 | if verbose >= 1: 106 | print(f"Epoch {t} ended") 107 | mdl = DeepGMMModel(self.primal_net, self.dual_net) 108 | print(f"test error {mdl.evaluate_t(test_data_t).data.item()}") 109 | 110 | mdl = DeepGMMModel(self.primal_net, self.dual_net) 111 | oos_loss: float = mdl.evaluate_t(test_data_t).data.item() 112 | print(f"test_loss:{oos_loss}") 113 | return oos_loss 114 | 115 | def dual_update(self, train_data_t: TrainDataSetTorch, verbose: int): 116 | self.dual_net.train(True) 117 | self.primal_net.train(False) 118 | with torch.no_grad(): 119 | epsilon = train_data_t.outcome - self.primal_net(train_data_t.treatment) 120 | for t in range(self.dual_iter): 121 | self.dual_opt.zero_grad() 122 | moment = torch.mean(self.dual_net(train_data_t.instrumental) * epsilon) 123 | reg = 0.25 * torch.mean((self.dual_net(train_data_t.instrumental) * epsilon) ** 2) 124 | loss = -moment + reg 125 | if verbose >= 2: 126 | print(f"dual loss:{loss.data.item()}") 127 | loss.backward() 128 | self.dual_opt.step() 129 | 130 | def primal_update(self, train_data_t: TrainDataSetTorch, verbose: int): 131 | self.dual_net.train(False) 132 | self.primal_net.train(True) 133 | with torch.no_grad(): 134 | dual = self.dual_net(train_data_t.instrumental) 135 | for t in range(self.primal_iter): 136 | self.primal_opt.zero_grad() 137 | epsilon = train_data_t.outcome - self.primal_net(train_data_t.treatment) 138 | loss = torch.mean(dual * epsilon) 139 | if verbose >= 2: 140 | print(f"primal loss:{loss.data.item()}") 141 | loss.backward() 142 | self.primal_opt.step() 143 | 144 | class DeepGMM(object): 145 | def __init__(self) -> None: 146 | self.config = { 147 | 'methodName': 'DeepGMM', 148 | 'resultDir': './Results/tmp/', 149 | "primal_iter": 1, 150 | "dual_iter": 5, 151 | "epochs": 300, 152 | "primal_weight_decay": 0.0, 153 | "dual_weight_decay": 0.0, 154 | 'device': 'cuda:0', 155 | 'verbose': 1, 156 | 'epoch_show': 50, 157 | 'seed': 2022, 158 | } 159 | 160 | def set_Configuration(self, config): 161 | self.config = config 162 | 163 | def fit(self, data, exp=-1, config=None): 164 | if config is None: 165 | config = self.config 166 | 167 | set_seed(config['seed']) 168 | data.numpy() 169 | 170 | self.z_dim = data.train.z.shape[1] 171 | self.x_dim = data.train.x.shape[1] 172 | self.t_dim = data.train.t.shape[1] 173 | 174 | response_net, dual_net = build_net_for_demand(self.z_dim,self.x_dim,self.t_dim) 175 | net_list = [response_net, dual_net] 176 | 177 | train_data = TrainDataSet(treatment=np.concatenate([data.train.t, data.train.x],1), 178 | instrumental=np.concatenate([data.train.z, data.train.x],1), 179 | covariate=None, 180 | outcome=data.train.y, 181 | structural=data.train.z) 182 | 183 | val_data = TrainDataSet(treatment=np.concatenate([data.valid.t, data.valid.x],1), 184 | instrumental=np.concatenate([data.valid.z, data.valid.x],1), 185 | covariate=None, 186 | outcome=data.valid.y, 187 | structural=data.valid.z) 188 | 189 | test_data = TestDataSet(treatment=np.concatenate([data.test.t, data.test.x],1), 190 | instrumental=np.concatenate([data.test.z, data.test.x],1), 191 | covariate=None, 192 | outcome=None, 193 | structural=data.test.z) 194 | 195 | data_list = [train_data, val_data, test_data] 196 | 197 | train_config = {"primal_iter": config['primal_iter'], 198 | "dual_iter": config['dual_iter'], 199 | "epochs": config['epochs'], 200 | "primal_weight_decay": config['primal_weight_decay'], 201 | "dual_weight_decay": config['dual_weight_decay'], 202 | } 203 | device = config['device'] 204 | 205 | print('Run {}-th experiment for {}. '.format(exp, config['methodName'])) 206 | 207 | trainer = DeepGMMTrainer(data_list, net_list, train_config, device) 208 | test_loss = trainer.train(rand_seed=config['seed'], verbose=config['verbose'], epoch_show=config['epoch_show']) 209 | 210 | def estimation(data): 211 | input0 = torch.Tensor(np.concatenate([data.t-data.t, data.x],1)).to(self.device) 212 | point0 = response_net(input0).detach().cpu().data().numpy() 213 | 214 | inputt = torch.Tensor(np.concatenate([data.t, data.x],1)).to(self.device) 215 | pointt = response_net(inputt).detach().cpu().data().numpy() 216 | 217 | return point0, pointt 218 | 219 | print('End. ' + '-'*20) 220 | 221 | self.estimation = estimation 222 | self.response_net = response_net 223 | self.dual_net = dual_net 224 | self.device = device 225 | 226 | 227 | def predict(self, data=None, t=None, x=None): 228 | if data is None: 229 | data = self.data.test 230 | 231 | if x is None: 232 | x = data.x 233 | 234 | if t is None: 235 | t = data.t 236 | 237 | with torch.no_grad(): 238 | input = torch.Tensor(np.concatenate([t,x],1)).to(self.device) 239 | pred = self.response_net(input).detach().cpu().numpy() 240 | 241 | return pred 242 | 243 | def ITE(self, data=None, t=None, x=None): 244 | if data is None: 245 | data = self.data.test 246 | 247 | if x is None: 248 | x = data.x 249 | 250 | if t is None: 251 | t = data.t 252 | 253 | input_0 = torch.Tensor(np.concatenate([t-t,x],1)).to(self.device) 254 | input_1 = torch.Tensor(np.concatenate([t-t+1,x],1)).to(self.device) 255 | input_t = torch.Tensor(np.concatenate([t,x],1)).to(self.device) 256 | 257 | ITE_0 = self.response_net(input_0).detach().cpu().numpy() 258 | ITE_1 = self.response_net(input_1).detach().cpu().numpy() 259 | ITE_t = self.response_net(input_t).detach().cpu().numpy() 260 | 261 | return ITE_0,ITE_1,ITE_t 262 | 263 | def ATE(self, data=None, t=None, x=None): 264 | ITE_0,ITE_1,ITE_t = self.ITE(data,t,x) 265 | 266 | return np.mean(ITE_1-ITE_0), np.mean(ITE_t-ITE_0) -------------------------------------------------------------------------------- /mliv/inference/gmm/deepgmm_v1/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any, Iterator, Tuple 2 | from itertools import product 3 | 4 | 5 | def grid_search_dict(org_params: Dict[str, Any]) -> Iterator[Tuple[str, Dict[str, Any]]]: 6 | """ 7 | Iterate list in dict to do grid search. 8 | 9 | Examples 10 | -------- 11 | >>> test_dict = dict(a=[1,2], b = [1,2,3], c = 4) 12 | >>> list(grid_search_dict(test_dict)) 13 | [('a:1-b:1', {'c': 4, 'a': 1, 'b': 1}), 14 | ('a:1-b:2', {'c': 4, 'a': 1, 'b': 2}), 15 | ('a:1-b:3', {'c': 4, 'a': 1, 'b': 3}), 16 | ('a:2-b:1', {'c': 4, 'a': 2, 'b': 1}), 17 | ('a:2-b:2', {'c': 4, 'a': 2, 'b': 2}), 18 | ('a:2-b:3', {'c': 4, 'a': 2, 'b': 3})] 19 | >>> test_dict = dict(a=1, b = 2, c = 3) 20 | >>> list(grid_search_dict(test_dict)) 21 | [('one', {'a': 1, 'b': 2, 'c': 3})] 22 | 23 | Parameters 24 | ---------- 25 | org_params : Dict 26 | Dictionary to be grid searched 27 | 28 | Yields 29 | ------ 30 | name : str 31 | Name that describes the parameter of the grid 32 | param: Dict[str, Any] 33 | Dictionary that contains the parameter at grid 34 | 35 | """ 36 | search_keys = [] 37 | non_search_keys = [] 38 | for key in org_params.keys(): 39 | if isinstance(org_params[key], list): 40 | search_keys.append(key) 41 | else: 42 | non_search_keys.append(key) 43 | if len(search_keys) == 0: 44 | yield "one", org_params 45 | else: 46 | param_generator = product(*[org_params[key] for key in search_keys]) 47 | for one_param_set in param_generator: 48 | one_dict = {k: org_params[k] for k in non_search_keys} 49 | tmp = dict(list(zip(search_keys, one_param_set))) 50 | one_dict.update(tmp) 51 | one_name = "-".join([k + ":" + str(tmp[k]) for k in search_keys]) 52 | yield one_name, one_dict 53 | -------------------------------------------------------------------------------- /mliv/inference/gmm/deepgmm_v1/utils/custom_logging.py: -------------------------------------------------------------------------------- 1 | import json 2 | import shutil 3 | from typing import Union 4 | from pathlib import Path, PosixPath 5 | import requests 6 | import logging 7 | 8 | 9 | LOG_FORMAT = logging.Formatter( 10 | '%(name)s: %(asctime)s,%(msecs)d %(levelname)-4s [%(filename)s:%(lineno)d] %(message)s') 11 | 12 | logger = logging.getLogger() 13 | 14 | 15 | class SlackLoggingHandler(logging.StreamHandler): 16 | def __init__(self, webhook_url, stream=None): 17 | super(SlackLoggingHandler, self).__init__(stream) 18 | self.url = webhook_url 19 | 20 | def emit(self, record): 21 | message = super(SlackLoggingHandler, self).format(record) 22 | requests.post(self.url, json.dumps({'text': message})) 23 | 24 | 25 | def configure_logger(logger_name: str, 26 | log_format: str = LOG_FORMAT, 27 | log_dir: Union[str, Path, PosixPath, None] = None, 28 | webhook_url: Union[str, None] = None): 29 | # get root logger 30 | logger = logging.getLogger() 31 | logger.name = logger_name 32 | 33 | # slack post 34 | if webhook_url is not None: 35 | slack_handler = SlackLoggingHandler(webhook_url) 36 | slack_handler.setLevel(logging.ERROR) 37 | slack_handler.setFormatter(log_format) 38 | logger.addHandler(slack_handler) 39 | 40 | if log_dir is not None: 41 | log_dir = Path(log_dir) 42 | if log_dir.exists(): 43 | shutil.rmtree(log_dir) 44 | log_dir.mkdir(parents=True) 45 | log_filename = str(log_dir.joinpath('text_log.txt')) 46 | file_handler = logging.FileHandler(log_filename) 47 | file_handler.setLevel(logging.INFO) 48 | file_handler.setFormatter(log_format) 49 | logger.addHandler(file_handler) 50 | 51 | # stdout 52 | stream_handler = logging.StreamHandler() 53 | stream_handler.setLevel(logging.INFO) 54 | stream_handler.setFormatter(log_format) 55 | logger.addHandler(stream_handler) 56 | 57 | logger.setLevel(logging.INFO) 58 | -------------------------------------------------------------------------------- /mliv/inference/gmm/deepgmm_v1/utils/pytorch_linear_reg_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def fit_linear(target: torch.Tensor, 5 | feature: torch.Tensor, 6 | reg: float = 0.0): 7 | """ 8 | Parameters 9 | ---------- 10 | target: torch.Tensor[nBatch, dim1, dim2, ...] 11 | feature: torch.Tensor[nBatch, feature_dim] 12 | reg: float 13 | value of l2 regularizer 14 | Returns 15 | ------- 16 | weight: torch.Tensor[feature_dim, dim1, dim2, ...] 17 | weight of ridge linear regression. weight.size()[0] = feature_dim+1 if add_intercept is true 18 | """ 19 | assert feature.dim() == 2 20 | assert target.dim() >= 2 21 | nData, nDim = feature.size() 22 | A = torch.matmul(feature.t(), feature) 23 | device = feature.device 24 | A = A + reg * torch.eye(nDim, device=device) 25 | # U = torch.cholesky(A) 26 | # A_inv = torch.cholesky_inverse(U) 27 | #TODO use cholesky version in the latest pytorch 28 | A_inv = torch.inverse(A) 29 | if target.dim() == 2: 30 | b = torch.matmul(feature.t(), target) 31 | weight = torch.matmul(A_inv, b) 32 | else: 33 | b = torch.einsum("nd,n...->d...", feature, target) 34 | weight = torch.einsum("de,d...->e...", A_inv, b) 35 | return weight 36 | 37 | 38 | def linear_reg_pred(feature: torch.Tensor, weight: torch.Tensor): 39 | assert weight.dim() >= 2 40 | if weight.dim() == 2: 41 | return torch.matmul(feature, weight) 42 | else: 43 | return torch.einsum("nd,d...->n...", feature, weight) 44 | 45 | 46 | def linear_reg_loss(target: torch.Tensor, 47 | feature: torch.Tensor, 48 | reg: float): 49 | weight = fit_linear(target, feature, reg) 50 | pred = linear_reg_pred(feature, weight) 51 | return torch.norm((target - pred)) ** 2 + reg * torch.norm(weight) ** 2 52 | 53 | 54 | def outer_prod(mat1: torch.Tensor, mat2: torch.Tensor): 55 | """ 56 | Parameters 57 | ---------- 58 | mat1: torch.Tensor[nBatch, mat1_dim1, mat1_dim2, mat1_dim3, ...] 59 | mat2: torch.Tensor[nBatch, mat2_dim1, mat2_dim2, mat2_dim3, ...] 60 | 61 | Returns 62 | ------- 63 | res : torch.Tensor[nBatch, mat1_dim1, ..., mat2_dim1, ...] 64 | """ 65 | 66 | mat1_shape = tuple(mat1.size()) 67 | mat2_shape = tuple(mat2.size()) 68 | assert mat1_shape[0] == mat2_shape[0] 69 | nData = mat1_shape[0] 70 | aug_mat1_shape = mat1_shape + (1,) * (len(mat2_shape) - 1) 71 | aug_mat1 = torch.reshape(mat1, aug_mat1_shape) 72 | aug_mat2_shape = (nData,) + (1,) * (len(mat1_shape) - 1) + mat2_shape[1:] 73 | aug_mat2 = torch.reshape(mat2, aug_mat2_shape) 74 | return aug_mat1 * aug_mat2 75 | 76 | 77 | def add_const_col(mat: torch.Tensor): 78 | """ 79 | 80 | Parameters 81 | ---------- 82 | mat : torch.Tensor[n_data, n_col] 83 | 84 | Returns 85 | ------- 86 | res : torch.Tensor[n_data, n_col+1] 87 | add one column only contains 1. 88 | 89 | """ 90 | assert mat.dim() == 2 91 | n_data = mat.size()[0] 92 | device = mat.device 93 | return torch.cat([mat, torch.ones((n_data, 1), device=device)], dim=1) 94 | -------------------------------------------------------------------------------- /mliv/inference/onestage/__init__.py: -------------------------------------------------------------------------------- 1 | from .onesiv_v1 import OneSIV -------------------------------------------------------------------------------- /mliv/inference/onestage/onesiv_v1.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import numpy as np 4 | from torch.utils.data import DataLoader 5 | from mliv.utils import set_seed, cat 6 | 7 | example = ''' 8 | from mliv.inference import OneSIV 9 | 10 | model = OneSIV() 11 | model.fit(data) 12 | ITE = model.predict(data.train) 13 | ATE,_ = model.ATE(data.train) 14 | ''' 15 | 16 | class Networks(nn.Module): 17 | def __init__(self, z_dim, x_dim, t_dim, dropout): 18 | super(Networks, self).__init__() 19 | 20 | t_input_dim, y_input_dim = z_dim+x_dim, t_dim+x_dim 21 | 22 | self.t_net = nn.Sequential(nn.Linear(t_input_dim, 1280), 23 | nn.ReLU(), 24 | nn.Dropout(dropout), 25 | nn.Linear(1280, 320), 26 | nn.ReLU(), 27 | nn.Dropout(dropout), 28 | nn.Linear(320, 32), 29 | nn.ReLU(), 30 | nn.Dropout(dropout), 31 | nn.Linear(32, t_dim)) 32 | 33 | self.y_net = nn.Sequential(nn.Linear(y_input_dim, 1280), 34 | nn.ReLU(), 35 | nn.Dropout(dropout), 36 | nn.Linear(1280, 320), 37 | nn.ReLU(), 38 | nn.Dropout(dropout), 39 | nn.Linear(320, 32), 40 | nn.ReLU(), 41 | nn.Dropout(dropout), 42 | nn.Linear(32, 1)) 43 | 44 | def forward(self, z, x): 45 | pred_t = self.t_net(cat([z,x])) 46 | yt_input = torch.cat((pred_t,x), 1) 47 | pred_yt = self.y_net(yt_input) 48 | 49 | return pred_t, pred_yt 50 | 51 | class OneSIV(object): 52 | def __init__(self) -> None: 53 | self.config = { 54 | 'methodName': 'OneSIV', 55 | 'device': 'cuda:0', 56 | 'learning_rate': 0.005, 57 | 'dropout': 0.5, 58 | 'beta1': 0.9, 59 | 'beta2': 0.999, 60 | 'eps': 1e-8, 61 | 'w1': 0.0017, 62 | 'w2': 1.0, 63 | 'epochs': 30, 64 | 'verbose': 1, 65 | 'show_per_epoch': 10, 66 | 'batch_size':1000, 67 | 'seed': 2022, 68 | } 69 | 70 | def set_Configuration(self, config): 71 | self.config = config 72 | 73 | def get_loader(self, data=None): 74 | if data is None: 75 | data = self.train 76 | loader = DataLoader(data, batch_size=self.batch_size) 77 | return loader 78 | 79 | def fit(self, data, exp=-1, config=None): 80 | if config is None: 81 | config = self.config 82 | 83 | self.z_dim = data.train.z.shape[1] 84 | self.x_dim = data.train.x.shape[1] 85 | self.t_dim = data.train.t.shape[1] 86 | 87 | self.device = config['device'] 88 | self.batch_size = config['batch_size'] 89 | 90 | set_seed(config['seed']) 91 | data.tensor() 92 | data.to(self.device) 93 | self.data = data 94 | 95 | OneSIV_dict = { 96 | 'z_dim':self.z_dim, 97 | 'x_dim':self.x_dim, 98 | 't_dim':self.t_dim, 99 | 'dropout':config['dropout'], 100 | } 101 | 102 | net = Networks(**OneSIV_dict) 103 | net.to(self.device) 104 | 105 | optimizer = torch.optim.Adam(net.parameters(), lr=config['learning_rate'], betas=(config['beta1'], config['beta2']),eps=config['eps']) 106 | t_loss = torch.nn.MSELoss() 107 | y_loss = torch.nn.MSELoss() 108 | 109 | print('Run {}-th experiment for {}. '.format(exp, config['methodName'])) 110 | 111 | train_loader = self.get_loader(data.train) 112 | 113 | def estimation(data): 114 | net.eval() 115 | return net.y_net(cat([data.t-data.t, data.x])), net.y_net(cat([data.t, data.x])) 116 | 117 | for epoch in range(config['epochs']): 118 | net.train() 119 | 120 | for idx, inputs in enumerate(train_loader): 121 | z = inputs['z'].to(self.device) 122 | x = inputs['x'].to(self.device) 123 | t = inputs['t'].to(self.device) 124 | y = inputs['y'].to(self.device) 125 | 126 | pred_t, pred_y = net(z,x) 127 | loss = config['w1'] * y_loss(pred_y, y) + config['w2'] * t_loss(pred_t, t) 128 | 129 | optimizer.zero_grad() 130 | loss.backward() 131 | optimizer.step() 132 | 133 | net.eval() 134 | if (config['verbose'] >= 1 and epoch % config['show_per_epoch'] == 0 ) or epoch == config['epochs']-1: 135 | _, pred_test_y = estimation(data.test) 136 | print(f'Epoch {epoch}: {y_loss(pred_test_y, data.test.y)}. ') 137 | 138 | print('End. ' + '-'*20) 139 | 140 | self.estimation = estimation 141 | self.y_net = net.y_net 142 | self.t_net = net.t_net 143 | 144 | def predict(self, data=None, t=None, x=None): 145 | if data is None: 146 | data = self.data.test 147 | 148 | if x is None: 149 | x = data.x 150 | 151 | if t is None: 152 | t = data.t 153 | 154 | return self.y_net(cat([t,x])).detach().cpu().numpy() 155 | 156 | def ITE(self, data=None, t=None, x=None): 157 | if data is None: 158 | data = self.data.test 159 | 160 | if x is None: 161 | x = data.x 162 | 163 | if t is None: 164 | t = data.t 165 | 166 | ITE_0 = self.y_net(cat([t-t,x])).detach().cpu().numpy() 167 | ITE_1 = self.y_net(cat([t-t+1,x])).detach().cpu().numpy() 168 | ITE_t = self.y_net(cat([t,x])).detach().cpu().numpy() 169 | 170 | return ITE_0,ITE_1,ITE_t 171 | 172 | def ATE(self, data=None, t=None, x=None): 173 | ITE_0,ITE_1,ITE_t = self.ITE(data,t,x) 174 | 175 | return np.mean(ITE_1-ITE_0), np.mean(ITE_t-ITE_0) 176 | 177 | -------------------------------------------------------------------------------- /mliv/inference/sieve/__init__.py: -------------------------------------------------------------------------------- 1 | from .kerneliv_v1 import KernelIV 2 | from .dualiv_v1 import DualIV -------------------------------------------------------------------------------- /mliv/inference/sieve/dualiv_v1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import csv 3 | import cvxopt 4 | from mliv.utils import set_seed, cat 5 | 6 | example = ''' 7 | from mliv.inference import DualIV 8 | 9 | model = DualIV() 10 | model.fit(data) 11 | ITE = model.predict(data.train) 12 | ATE,_ = model.ATE(data.train) 13 | ''' 14 | 15 | def quadprog(H, f, L=None, k=None, Aeq=None, beq=None, lb=None, ub=None): 16 | """ 17 | Input: Numpy arrays, the format follows MATLAB quadprog function: https://www.mathworks.com/help/optim/ug/quadprog.html 18 | Output: Numpy array of the solution 19 | """ 20 | n_var = H.shape[1] 21 | 22 | P = cvxopt.matrix(H, tc='d') 23 | q = cvxopt.matrix(f, tc='d') 24 | 25 | if L is not None or k is not None: 26 | assert(k is not None and L is not None) 27 | if lb is not None: 28 | L = np.vstack([L, -np.eye(n_var)]) 29 | k = np.vstack([k, -lb]) 30 | 31 | if ub is not None: 32 | L = np.vstack([L, np.eye(n_var)]) 33 | k = np.vstack([k, ub]) 34 | 35 | L = cvxopt.matrix(L, tc='d') 36 | k = cvxopt.matrix(k, tc='d') 37 | 38 | if Aeq is not None or beq is not None: 39 | assert(Aeq is not None and beq is not None) 40 | Aeq = cvxopt.matrix(Aeq, tc='d') 41 | beq = cvxopt.matrix(beq, tc='d') 42 | 43 | sol = cvxopt.solvers.qp(P, q, L, k, Aeq, beq) 44 | 45 | return np.array(sol['x']) 46 | def median_inter(z): 47 | n = len(z) 48 | z = z.reshape(n, -1) 49 | A = np.repeat(z, repeats=n, axis=1) 50 | B = A.T 51 | dist = np.abs(A-B).reshape(-1,1) 52 | vz=np.median(dist) 53 | return vz 54 | 55 | def get_K_entry(x,z,v): 56 | return np.exp((np.linalg.norm(x-z)**2) / (-2 * (v **2))) 57 | 58 | def get_K_matrix(X1,X2,v): 59 | M = len(X1) 60 | N = len(X2) 61 | K_true = np.zeros((M,N)) 62 | 63 | for i in range(M): 64 | for j in range(N): 65 | K_true[i,j] = get_K_entry(X1[i:i+1,:].T, X2[j:j+1,:].T, v) 66 | return K_true 67 | 68 | def get_K_entry_2d(x,z,Vmat): 69 | return np.exp((x-z).T @ Vmat @ (x-z) /2) 70 | 71 | def get_K_matrix_2d(X1,X2,Vmat): 72 | M = len(X1) 73 | N = len(X2) 74 | K_true = np.zeros((M,N)) 75 | Vmat = np.linalg.inv(Vmat) 76 | 77 | for i in range(M): 78 | for j in range(N): 79 | K_true[i,j] = get_K_entry_2d(X1[i:i+1,:].T, X2[j:j+1,:].T, Vmat) 80 | 81 | return K_true 82 | 83 | def DualIV_trainer(x, y, z): 84 | N, x_dim = x.shape 85 | 86 | vx = [median_inter(x[:,i]) for i in range(x_dim)] 87 | vz = median_inter(z) 88 | vy = median_inter(y) 89 | 90 | K_xx = 1 91 | for i in range(x_dim): 92 | K_xx = K_xx * get_K_matrix(x[:,i:i+1], x[:,i:i+1], vx[i]) 93 | K_zz = get_K_matrix(z, z, vz) 94 | K_yy = get_K_matrix(y, y, vy) 95 | 96 | K = K_xx 97 | 98 | yz = np.concatenate([y,z],-1) 99 | vyz = 90000 100 | Vmat = np.array([[vy, vyz], [vyz, vz]]) 101 | L_yzyz = get_K_matrix_2d(yz, yz, Vmat) 102 | 103 | L = L_yzyz 104 | 105 | lambda1 = 0.001 106 | gamma = N * np.linalg.norm(L @ L, 2) / np.linalg.norm(K @ L, 2) ** 2 107 | A = L @ L + 1 / N * gamma * L @ (K @ K) @ L + lambda1 * np.eye(N) 108 | Ainv = np.linalg.inv(A) 109 | 110 | lambda2 = 0.001 111 | Q = 2 * K.T @ L.T @ Ainv @ L @ K + lambda2 * np.eye(N) 112 | R = - 2 * K.T @ L.T @ Ainv @ L @ y 113 | 114 | beta = quadprog(Q,R) 115 | 116 | return beta, vx 117 | 118 | class DualIV(object): 119 | def __init__(self) -> None: 120 | self.config = { 121 | 'methodName': 'DualIV', 122 | 'num': -1, 123 | 'seed': 2022, 124 | } 125 | 126 | def set_Configuration(self, config): 127 | self.config = config 128 | 129 | def fit(self, data, exp=-1, config=None): 130 | if config is None: 131 | config = self.config 132 | 133 | set_seed(config['seed']) 134 | data.numpy() 135 | 136 | num = config['num'] 137 | num = num if num > 0 else data.train.length 138 | 139 | x4train = cat([data.train.t[:num], data.train.x[:num]]) 140 | y4train = data.train.y[:num] 141 | z4train = data.train.z[:num] 142 | 143 | print('Run {}-th experiment for {}. '.format(exp, config['methodName'])) 144 | 145 | beta, vx = DualIV_trainer(x4train, y4train, z4train) 146 | 147 | def estimation(data): 148 | return backResult(x4train, cat([data.t-data.t, data.x]), beta, vx), backResult(x4train, cat([data.t, data.x]), beta, vx) 149 | 150 | print('End. ' + '-'*20) 151 | 152 | self.x4train = x4train 153 | self.beta = beta 154 | self.vx = vx 155 | self.estimation = estimation 156 | 157 | def predict(self, data=None, t=None, x=None): 158 | if data is None: 159 | data = self.data.test 160 | 161 | if x is None: 162 | x = data.x 163 | 164 | if t is None: 165 | t = data.t 166 | 167 | return backResult(self.x4train, cat([t, x]),self.beta ,self.vx) 168 | 169 | def ITE(self, data=None, t=None, x=None): 170 | if data is None: 171 | data = self.data.test 172 | 173 | if x is None: 174 | x = data.x 175 | 176 | if t is None: 177 | t = data.t 178 | 179 | ITE_0 = backResult(self.x4train, cat([t-t, x]),self.beta ,self.vx) 180 | ITE_1 = backResult(self.x4train, cat([t-t+1, x]),self.beta ,self.vx) 181 | ITE_t = backResult(self.x4train, cat([t, x]),self.beta ,self.vx) 182 | 183 | return ITE_0,ITE_1,ITE_t 184 | 185 | def ATE(self, data=None, t=None, x=None): 186 | ITE_0,ITE_1,ITE_t = self.ITE(data,t,x) 187 | 188 | return np.mean(ITE_1-ITE_0), np.mean(ITE_t-ITE_0) 189 | 190 | def backResult(x, x_vis, beta, vx): 191 | x_dim = x.shape[1] 192 | K_Xtest = 1 193 | for i in range(x_dim): 194 | K_Xtest = K_Xtest * get_K_matrix(x[:,i:i+1], x_vis[:,i:i+1], vx[i]) 195 | 196 | y_vis_dual = K_Xtest.T @ beta 197 | return y_vis_dual 198 | 199 | -------------------------------------------------------------------------------- /mliv/inference/sieve/kerneliv_v1.py: -------------------------------------------------------------------------------- 1 | from mliv.utils import set_seed, cat 2 | from typing import NamedTuple, Dict, Any, Optional, List 3 | import numpy as np 4 | import torch 5 | from scipy.spatial.distance import cdist 6 | from sklearn.model_selection import train_test_split 7 | 8 | example = ''' 9 | from mliv.inference import KernelIV 10 | 11 | model = KernelIV() 12 | model.fit(data) 13 | ITE = model.predict(data.train) 14 | ATE,_ = model.ATE(data.train) 15 | ''' 16 | 17 | ############################################## data_class.py ####################### 18 | class TrainDataSet(NamedTuple): 19 | treatment: np.ndarray 20 | instrumental: np.ndarray 21 | covariate: Optional[np.ndarray] 22 | outcome: np.ndarray 23 | structural: np.ndarray 24 | 25 | class TestDataSet(NamedTuple): 26 | treatment: np.ndarray 27 | covariate: Optional[np.ndarray] 28 | structural: np.ndarray 29 | instrumental: Optional[np.ndarray] 30 | outcome: Optional[np.ndarray] 31 | 32 | class TrainDataSetTorch(NamedTuple): 33 | treatment: torch.Tensor 34 | instrumental: torch.Tensor 35 | covariate: torch.Tensor 36 | outcome: torch.Tensor 37 | structural: torch.Tensor 38 | 39 | @classmethod 40 | def from_numpy(cls, train_data: TrainDataSet): 41 | covariate = None 42 | if train_data.covariate is not None: 43 | covariate = torch.tensor(train_data.covariate, dtype=torch.float32) 44 | return TrainDataSetTorch(treatment=torch.tensor(train_data.treatment, dtype=torch.float32), 45 | instrumental=torch.tensor(train_data.instrumental, dtype=torch.float32), 46 | covariate=covariate, 47 | outcome=torch.tensor(train_data.outcome, dtype=torch.float32), 48 | structural=torch.tensor(train_data.structural, dtype=torch.float32)) 49 | 50 | def to_gpu(self): 51 | covariate = None 52 | if self.covariate is not None: 53 | covariate = self.covariate.cuda() 54 | return TrainDataSetTorch(treatment=self.treatment.cuda(), 55 | instrumental=self.instrumental.cuda(), 56 | covariate=covariate, 57 | outcome=self.outcome.cuda(), 58 | structural=self.structural.cuda()) 59 | 60 | 61 | class TestDataSetTorch(NamedTuple): 62 | treatment: torch.Tensor 63 | instrumental: torch.Tensor 64 | covariate: torch.Tensor 65 | outcome: torch.Tensor 66 | structural: torch.Tensor 67 | 68 | @classmethod 69 | def from_numpy(cls, test_data: TestDataSet): 70 | covariate = None 71 | instrumental = None 72 | outcome = None 73 | if test_data.covariate is not None: 74 | covariate = torch.tensor(test_data.covariate, dtype=torch.float32) 75 | if test_data.instrumental is not None: 76 | instrumental = torch.tensor(test_data.instrumental, dtype=torch.float32) 77 | if test_data.outcome is not None: 78 | outcome = torch.tensor(test_data.outcome, dtype=torch.float32) 79 | return TestDataSetTorch(treatment=torch.tensor(test_data.treatment, dtype=torch.float32), 80 | covariate=covariate, 81 | instrumental=instrumental, 82 | outcome=outcome, 83 | structural=torch.tensor(test_data.structural, dtype=torch.float32)) 84 | def to_gpu(self): 85 | covariate = None 86 | instrumental = None 87 | outcome = None 88 | if self.covariate is not None: 89 | covariate = self.covariate.cuda() 90 | if self.instrumental is not None: 91 | instrumental = self.instrumental.cuda() 92 | if self.outcome is not None: 93 | outcome = self.outcome.cuda() 94 | return TestDataSetTorch(treatment=self.treatment.cuda(), 95 | covariate=covariate, 96 | instrumental=instrumental, 97 | outcome=outcome, 98 | structural=self.structural.cuda()) 99 | 100 | ################################# model.py ############################ 101 | class KernelIVModel: 102 | 103 | def __init__(self, X_train: np.ndarray, alpha: np.ndarray, sigma: float): 104 | """ 105 | 106 | Parameters 107 | ---------- 108 | X_train: np.ndarray[n_stage1, dim_treatment] 109 | data for treatment 110 | alpha: np.ndarray[n_stage1*n_stage2 ,dim_outcome] 111 | final weight for prediction 112 | sigma: gauss parameter 113 | """ 114 | self.X_train = X_train 115 | self.alpha = alpha 116 | self.sigma = sigma 117 | 118 | @staticmethod 119 | def cal_gauss(XA, XB, sigma: float = 1): 120 | """ 121 | Returns gaussian kernel matrix 122 | Parameters 123 | ---------- 124 | XA : np.ndarray[n_data1, n_dim] 125 | XB : np.ndarray[n_data2, n_dim] 126 | sigma : float 127 | 128 | Returns 129 | ------- 130 | mat: np.ndarray[n_data1, n_data2] 131 | """ 132 | dist_mat = cdist(XA, XB, "sqeuclidean") 133 | return np.exp(-dist_mat / sigma) 134 | 135 | def predict(self, treatment: np.ndarray, covariate: np.ndarray): 136 | X = np.array(treatment, copy=True) 137 | if covariate is not None: 138 | X = np.concatenate([X, covariate], axis=1) 139 | Kx = self.cal_gauss(X, self.X_train, self.sigma) 140 | return np.dot(Kx, self.alpha) 141 | 142 | def evaluate(self, test_data: TestDataSet): 143 | pred = self.predict(test_data.treatment, test_data.covariate) 144 | return np.mean((test_data.structural - pred)**2) 145 | 146 | ############## trainer.py ############## 147 | def get_median(X) -> float: 148 | dist_mat = cdist(X, X, "sqeuclidean") 149 | res: float = np.median(dist_mat) 150 | return res 151 | 152 | 153 | class KernelIVTrainer: 154 | 155 | def __init__(self, data_list: List, train_params: Dict[str, Any]): 156 | self.data_list = data_list 157 | 158 | self.lambda1 = train_params["lam1"] 159 | self.lambda2 = train_params["lam2"] 160 | self.split_ratio = train_params["split_ratio"] 161 | 162 | def split_train_data(self, train_data: TrainDataSet): 163 | n_data = train_data[0].shape[0] 164 | idx_train_1st, idx_train_2nd = train_test_split(np.arange(n_data), train_size=self.split_ratio) 165 | 166 | def get_data(data, idx): 167 | return data[idx] if data is not None else None 168 | 169 | train_1st_data = TrainDataSet(*[get_data(data, idx_train_1st) for data in train_data]) 170 | train_2nd_data = TrainDataSet(*[get_data(data, idx_train_2nd) for data in train_data]) 171 | return train_1st_data, train_2nd_data 172 | 173 | def train(self, rand_seed: int = 42, verbose: int = 0) -> float: 174 | """ 175 | 176 | Parameters 177 | ---------- 178 | rand_seed: int 179 | random seed 180 | verbose : int 181 | Determine the level of logging 182 | Returns 183 | ------- 184 | oos_result : float 185 | The performance of model evaluated by oos 186 | """ 187 | train_data = self.data_list[0] 188 | test_data = self.data_list[2] 189 | train_1st_data, train_2nd_data = self.split_train_data(train_data) 190 | 191 | # get stage1 data 192 | X1 = train_1st_data.treatment 193 | if train_1st_data.covariate is not None: 194 | X1 = np.concatenate([X1, train_1st_data.covariate], axis=-1) 195 | Z1 = train_1st_data.instrumental 196 | Y1 = train_1st_data.outcome 197 | N = X1.shape[0] 198 | 199 | # get stage2 data 200 | X2 = train_2nd_data.treatment 201 | if train_2nd_data.covariate is not None: 202 | X2 = np.concatenate([X2, train_2nd_data.covariate], axis=-1) 203 | Z2 = train_2nd_data.instrumental 204 | Y2 = train_2nd_data.outcome 205 | M = X2.shape[0] 206 | 207 | if verbose > 0: 208 | print("start stage1") 209 | 210 | sigmaX = get_median(X1) 211 | sigmaZ = get_median(Z1) 212 | KX1X1 = KernelIVModel.cal_gauss(X1, X1, sigmaX) 213 | KZ1Z1 = KernelIVModel.cal_gauss(Z1, Z1, sigmaZ) 214 | KZ1Z2 = KernelIVModel.cal_gauss(Z1, Z2, sigmaZ) 215 | KX1X2 = KernelIVModel.cal_gauss(X1, X2, sigmaX) 216 | 217 | if isinstance(self.lambda1, list): 218 | self.lambda1 = 10 ** np.linspace(self.lambda1[0], self.lambda1[1], 50) 219 | gamma = self.stage1_tuning(KX1X1, KX1X2, KZ1Z1, KZ1Z2) 220 | else: 221 | gamma = np.linalg.solve(KZ1Z1 + N * self.lambda1 * np.eye(N), KZ1Z2) 222 | W = KX1X1.dot(gamma) 223 | if verbose > 0: 224 | print("end stage1") 225 | print("start stage2") 226 | 227 | if isinstance(self.lambda2, list): 228 | self.lambda2 = 10 ** np.linspace(self.lambda2[0], self.lambda2[1], 50) 229 | alpha = self.stage2_tuning(W, KX1X1, Y1, Y2) 230 | else: 231 | alpha = np.linalg.solve(W.dot(W.T) + M * self.lambda2 * KX1X1, W.dot(Y2)) 232 | 233 | if verbose > 0: 234 | print("end stage2") 235 | 236 | mdl = KernelIVModel(X1, alpha, sigmaX) 237 | train_loss = mdl.evaluate(train_data) 238 | 239 | test_loss = mdl.evaluate(test_data) 240 | if verbose > 0: 241 | print(f"test_loss:{test_loss}") 242 | 243 | return train_loss, test_loss, mdl 244 | 245 | def stage1_tuning(self, KX1X1, KX1X2, KZ1Z1, KZ1Z2): 246 | N = KX1X1.shape[0] 247 | gamma_list = [np.linalg.solve(KZ1Z1 + N * lam1 * np.eye(N), KZ1Z2) for lam1 in self.lambda1] 248 | score = [np.trace(gamma.T.dot(KX1X1.dot(gamma)) - 2 * KX1X2.T.dot(gamma)) for gamma in gamma_list] 249 | self.lambda1 = self.lambda1[np.argmin(score)] 250 | return gamma_list[np.argmin(score)] 251 | 252 | def stage2_tuning(self, W, KX1X1, Y1, Y2): 253 | M = W.shape[1] 254 | b = W.dot(Y2) 255 | A = W.dot(W.T) 256 | alpha_list = [np.linalg.solve(A + M * lam2 * KX1X1, b) for lam2 in self.lambda2] 257 | score = [np.linalg.norm(Y1 - KX1X1.dot(alpha)) for alpha in alpha_list] 258 | self.lambda2 = self.lambda2[np.argmin(score)] 259 | return alpha_list[np.argmin(score)] 260 | 261 | class KernelIV(object): 262 | def __init__(self) -> None: 263 | self.config = { 264 | 'methodName': 'KernelIV', 265 | 'num': -1, 266 | 'lam1': [-2, -10], 267 | 'lam2': [-2, -10], 268 | 'split_ratio': 0.5, 269 | 'verbose': 1, 270 | 'seed': 2022, 271 | } 272 | 273 | def set_Configuration(self, config): 274 | self.config = config 275 | 276 | def fit(self, data, exp=-1, config=None): 277 | if config is None: 278 | config = self.config 279 | 280 | set_seed(config['seed']) 281 | data.numpy() 282 | 283 | num = config['num'] 284 | num = num if num > 0 else data.train.length 285 | 286 | train_config = {'lam1': config['lam1'], 287 | 'lam2': config['lam2'], 288 | 'split_ratio': config['split_ratio']} 289 | verbose = config['verbose'] 290 | 291 | train_data = TrainDataSet(treatment=data.train.t[:num], 292 | instrumental=cat([data.train.z, data.train.x])[:num], 293 | covariate=data.train.x[:num], 294 | outcome=data.train.y[:num], 295 | structural=data.train.g[:num]) 296 | val_data = TrainDataSet(treatment=data.valid.t, 297 | instrumental=cat([data.valid.z, data.valid.x]), 298 | covariate=data.valid.x, 299 | outcome=data.valid.y, 300 | structural=data.valid.g) 301 | test_data = TestDataSet(treatment=data.test.t, 302 | instrumental=cat([data.test.z, data.test.x]), 303 | covariate=data.test.x, 304 | outcome=data.test.y, 305 | structural=data.test.g) 306 | data_list = [train_data, val_data, test_data] 307 | 308 | print('Run {}-th experiment for {}. '.format(exp, config['methodName'])) 309 | 310 | trainer = KernelIVTrainer(data_list, train_config) 311 | train_loss, test_loss, mdl = trainer.train(rand_seed=42, verbose=verbose) 312 | 313 | print('End. ' + '-'*20) 314 | 315 | def estimation(data): 316 | return mdl.predict(data.t-data.t, data.x), mdl.predict(data.t, data.x) 317 | 318 | self.mdl = mdl 319 | self.estimation = estimation 320 | 321 | def predict(self, data=None, t=None, x=None): 322 | if data is None: 323 | data = self.data.test 324 | 325 | if x is None: 326 | x = data.x 327 | 328 | if t is None: 329 | t = data.t 330 | 331 | return self.mdl.predict(t,x) 332 | 333 | def ITE(self, data=None, t=None, x=None): 334 | if data is None: 335 | data = self.data.test 336 | 337 | if x is None: 338 | x = data.x 339 | 340 | if t is None: 341 | t = data.t 342 | 343 | ITE_0 = self.mdl.predict(t-t,x) 344 | ITE_1 = self.mdl.predict(t-t+1,x) 345 | ITE_t = self.mdl.predict(t,x) 346 | 347 | return ITE_0,ITE_1,ITE_t 348 | 349 | def ATE(self, data=None, t=None, x=None): 350 | ITE_0,ITE_1,ITE_t = self.ITE(data,t,x) 351 | 352 | return np.mean(ITE_1-ITE_0), np.mean(ITE_t-ITE_0) -------------------------------------------------------------------------------- /mliv/inference/twosls/__init__.py: -------------------------------------------------------------------------------- 1 | from .vanilla2sls_v1 import Vanilla2SLS 2 | from .poly2sls_v1 import Poly2SLS 3 | from .nn2sls_v1 import NN2SLS -------------------------------------------------------------------------------- /mliv/inference/twosls/nn2sls_v1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from mliv.utils import set_seed, cat 4 | from torch.utils.data import DataLoader 5 | import numpy as np 6 | 7 | example = ''' 8 | from mliv.inference import NN2SLS 9 | 10 | model = NN2SLS() 11 | model.fit(data) 12 | ITE = model.predict(data.train) 13 | ATE,_ = model.ATE(data.train) 14 | ''' 15 | 16 | class NN2SLS(object): 17 | def __init__(self) -> None: 18 | self.config = { 19 | 'methodName': 'NN2SLS', 20 | 'device': 'cuda:0', 21 | 'instrumental_weight_decay': 0.0, 22 | 'covariate_weight_decay': 0.0, 23 | 'learning_rate': 0.005, 24 | 'verbose':1, 25 | 'show_per_epoch':5, 26 | 'lam2':0.1, 27 | 'epochs':100, 28 | 'batch_size':1000, 29 | 'seed': 2022 30 | } 31 | 32 | def set_Configuration(self, config): 33 | self.config = config 34 | 35 | def fit(self, data, exp=-1, config=None): 36 | if config is None: 37 | config = self.config 38 | 39 | self.z_dim = data.train.z.shape[1] 40 | self.x_dim = data.train.x.shape[1] 41 | self.t_dim = data.train.t.shape[1] 42 | 43 | self.device = config['device'] 44 | self.instrumental_weight_decay = config['instrumental_weight_decay'] 45 | self.covariate_weight_decay = config['covariate_weight_decay'] 46 | self.learning_rate = config['learning_rate'] 47 | 48 | self.verbose = config['verbose'] 49 | self.show_per_epoch = config['show_per_epoch'] 50 | self.lam2 = config['lam2'] 51 | self.epochs = config['epochs'] 52 | self.batch_size = config['batch_size'] 53 | 54 | self.build_net() 55 | 56 | set_seed(config['seed']) 57 | data.tensor() 58 | data.to(self.device) 59 | self.data = data 60 | 61 | print('Run {}-th experiment for {}. '.format(exp, config['methodName'])) 62 | 63 | self.train() 64 | 65 | print('End. ' + '-'*20) 66 | 67 | def build_net(self): 68 | self.instrumental_net = nn.Sequential(nn.Linear(self.z_dim+self.x_dim, 1280), 69 | nn.ReLU(), 70 | nn.Linear(1280, 320), 71 | nn.BatchNorm1d(320), 72 | nn.ReLU(), 73 | nn.Linear(320, 32), 74 | nn.ReLU(), 75 | nn.Linear(32, 1)) 76 | 77 | self.covariate_net = nn.Sequential(nn.Linear(self.x_dim+self.t_dim, 1280), 78 | nn.ReLU(), 79 | nn.Linear(1280, 320), 80 | nn.BatchNorm1d(320), 81 | nn.ReLU(), 82 | nn.Linear(320, 32), 83 | nn.ReLU(), 84 | nn.Linear(32, 1)) 85 | 86 | self.instrumental_net.to(self.device) 87 | self.covariate_net.to(self.device) 88 | 89 | self.instrumental_opt = torch.optim.Adam(self.instrumental_net.parameters(),lr=self.learning_rate,weight_decay=self.instrumental_weight_decay) 90 | self.covariate_opt = torch.optim.Adam(self.covariate_net.parameters(),lr=self.learning_rate,weight_decay=self.covariate_weight_decay) 91 | 92 | self.loss_fn4t = torch.nn.MSELoss() 93 | self.loss_fn4y = torch.nn.MSELoss() 94 | 95 | def train(self, verbose=None, show_per_epoch=None): 96 | if verbose is None or show_per_epoch is None: 97 | verbose, show_per_epoch = self.verbose, self.show_per_epoch 98 | 99 | self.lam2 *= self.data.train.length 100 | 101 | for exp in range(self.epochs): 102 | self.instrumental_update(self.data.train, verbose) 103 | 104 | if verbose >= 1 and (exp % show_per_epoch == 0 or exp == self.epochs - 1): 105 | print(type(self.data.train.z)) 106 | train_t_hat = self.instrumental_net(cat([self.data.train.x,self.data.train.z])).detach() 107 | valid_t_hat = self.instrumental_net(cat([self.data.valid.x,self.data.valid.z])).detach() 108 | 109 | loss_train = self.loss_fn4t(train_t_hat, self.data.train.t) 110 | loss_valid = self.loss_fn4t(valid_t_hat, self.data.valid.t) 111 | 112 | print("Epoch {} ended: train - {:.4f}, valid - {:.4f}.".format(exp, loss_train, loss_valid)) 113 | 114 | 115 | for exp in range(self.epochs): 116 | self.covariate_update(self.data.train, verbose) 117 | 118 | if verbose >= 1 and (exp % show_per_epoch == 0 or exp == self.epochs - 1): 119 | eval_train = self.evaluate(self.data.train) 120 | eval_valid = self.evaluate(self.data.valid) 121 | eval_test = self.evaluate(self.data.test) 122 | 123 | print(f"Epoch {exp} ended:") 124 | print(f"Train: {eval_train}. ") 125 | print(f"Valid: {eval_valid}. ") 126 | print(f"Test : {eval_test}. ") 127 | 128 | def get_loader(self, data=None): 129 | if data is None: 130 | data = self.train 131 | loader = DataLoader(data, batch_size=self.batch_size) 132 | return loader 133 | 134 | def instrumental_update(self, data, verbose): 135 | loader = self.get_loader(data) 136 | self.instrumental_net.train(True) 137 | 138 | for idx, inputs in enumerate(loader): 139 | x = inputs['x'].to(self.device) 140 | t = inputs['t'].to(self.device) 141 | z = inputs['z'].to(self.device) 142 | 143 | t_hat = self.instrumental_net(cat([x,z])) 144 | 145 | loss = self.loss_fn4t(t_hat, t) 146 | 147 | self.instrumental_opt.zero_grad() 148 | loss.backward() 149 | self.instrumental_opt.step() 150 | 151 | if verbose >= 2: 152 | print('Batch {} - loss: {:.4f}'.format(idx, loss)) 153 | 154 | self.instrumental_net.train(False) 155 | 156 | def covariate_update(self, data, verbose): 157 | loader = self.get_loader(data) 158 | self.covariate_net.train(True) 159 | 160 | for idx, inputs in enumerate(loader): 161 | x = inputs['x'].to(self.device) 162 | z = inputs['z'].to(self.device) 163 | y = inputs['y'].to(self.device) 164 | 165 | t_hat = self.instrumental_net(cat([x,z])) 166 | y_hat = self.covariate_net(cat([x,t_hat])) 167 | 168 | loss = self.loss_fn4y(y_hat, y) 169 | 170 | self.covariate_opt.zero_grad() 171 | loss.backward() 172 | self.covariate_opt.step() 173 | 174 | if verbose >= 2: 175 | print('Batch {} - loss: {:.4f}'.format(idx, loss)) 176 | 177 | self.covariate_net.train(False) 178 | 179 | def predict(self, data=None, t=None, x=None): 180 | if data is None: 181 | data = self.data.test 182 | 183 | if x is None: 184 | x = data.x 185 | 186 | if t is None: 187 | t = data.t 188 | 189 | return self.covariate_net(cat([x,t])).detach().cpu().numpy() 190 | 191 | def ITE(self, data=None, t=None, x=None): 192 | if data is None: 193 | data = self.data.test 194 | 195 | if x is None: 196 | x = data.x 197 | 198 | if t is None: 199 | t = data.t 200 | 201 | ITE_0 = self.covariate_net(cat([x,t-t])).detach().cpu().numpy() 202 | ITE_1 = self.covariate_net(cat([x,t-t+1])).detach().cpu().numpy() 203 | ITE_t = self.covariate_net(cat([x,t])).detach().cpu().numpy() 204 | 205 | return ITE_0,ITE_1,ITE_t 206 | 207 | def ATE(self, data=None, t=None, x=None): 208 | ITE_0,ITE_1,ITE_t = self.ITE(data,t,x) 209 | 210 | return np.mean(ITE_1-ITE_0), np.mean(ITE_t-ITE_0) 211 | 212 | def estimation(self, data): 213 | self.covariate_net.train(False) 214 | 215 | y0_hat = self.covariate_net(cat([data.x,data.t-data.t])) 216 | yt_hat = self.covariate_net(cat([data.x,data.t])) 217 | 218 | return y0_hat, yt_hat 219 | 220 | def evaluate(self, data): 221 | y0_hat, yt_hat = self.estimation(data) 222 | 223 | loss_y = self.loss_fn4y(yt_hat, data.y) 224 | 225 | eval_str = 'loss_y: {:.4f}'.format(loss_y) 226 | return eval_str -------------------------------------------------------------------------------- /mliv/inference/twosls/poly2sls_v1.py: -------------------------------------------------------------------------------- 1 | from sklearn.model_selection import GridSearchCV 2 | from sklearn.linear_model import Ridge 3 | from sklearn.preprocessing import PolynomialFeatures 4 | from sklearn.pipeline import Pipeline 5 | from sklearn.linear_model import LinearRegression 6 | import numpy as np 7 | from mliv.utils import set_seed 8 | 9 | example = ''' 10 | from mliv.inference import Poly2SLS 11 | 12 | model = Poly2SLS() 13 | model.fit(data) 14 | ITE = model.predict(data.train) 15 | ATE,_ = model.ATE(data.train) 16 | ''' 17 | 18 | class Poly2SLS(object): 19 | def __init__(self) -> None: 20 | self.config = { 21 | 'methodName': 'Poly2SLS', 22 | 'seed': 2022 23 | } 24 | 25 | def set_Configuration(self, config): 26 | self.config = config 27 | 28 | def fit(self, data, exp=-1, config=None): 29 | if config is None: 30 | config = self.config 31 | 32 | set_seed(config['seed']) 33 | data.numpy() 34 | 35 | print('Run {}-th experiment for {}. '.format(exp, config['methodName'])) 36 | 37 | params = dict(poly__degree=range(1, 4), ridge__alpha=np.logspace(-5, 5, 11)) 38 | pipe = Pipeline([('poly', PolynomialFeatures()), ('ridge', Ridge())]) 39 | stage_1 = GridSearchCV(pipe, param_grid=params, cv=5) 40 | stage_1.fit(np.concatenate([data.train.z, 1-data.train.z, data.train.x], axis=1), data.train.t) 41 | t_hat = stage_1.predict(np.concatenate([data.train.z, 1-data.train.z, data.train.x], axis=1)) 42 | 43 | pipe2 = Pipeline([('poly', PolynomialFeatures()), ('ridge', Ridge())]) 44 | stage_2 = GridSearchCV(pipe2, param_grid=params, cv=5) 45 | stage_2.fit(np.concatenate([t_hat, data.train.x], axis=1), data.train.y) 46 | 47 | self.data = data 48 | self.stage_1 = stage_1 49 | self.stage_2 = stage_2 50 | 51 | print('End. ' + '-'*20) 52 | 53 | def estimation(data): 54 | return stage_2.predict(np.concatenate([data.t-data.t, data.x], axis=1)), stage_2.predict(np.concatenate([data.t, data.x], axis=1)) 55 | 56 | self.estimation = estimation 57 | 58 | def predict(self, data=None, t=None, x=None): 59 | if data is None: 60 | data = self.data.test 61 | 62 | if x is None: 63 | x = data.x 64 | 65 | if t is None: 66 | t = data.t 67 | 68 | return self.stage_2.predict(np.concatenate([t, x], axis=1)) 69 | 70 | def ITE(self, data=None, t=None, x=None): 71 | if data is None: 72 | data = self.data.test 73 | 74 | if x is None: 75 | x = data.x 76 | 77 | if t is None: 78 | t = data.t 79 | 80 | ITE_0 = self.stage_2.predict(np.concatenate([t-t, x], axis=1)) 81 | ITE_1 = self.stage_2.predict(np.concatenate([t-t+1, x], axis=1)) 82 | ITE_t = self.stage_2.predict(np.concatenate([t, x], axis=1)) 83 | 84 | return ITE_0,ITE_1,ITE_t 85 | 86 | def ATE(self, data=None, t=None, x=None): 87 | ITE_0,ITE_1,ITE_t = self.ITE(data,t,x) 88 | 89 | return np.mean(ITE_1-ITE_0), np.mean(ITE_t-ITE_0) 90 | -------------------------------------------------------------------------------- /mliv/inference/twosls/vanilla2sls_v1.py: -------------------------------------------------------------------------------- 1 | from sklearn.linear_model import LinearRegression 2 | import numpy as np 3 | from mliv.utils import set_seed 4 | 5 | example = ''' 6 | from mliv.inference import Vanilla2SLS 7 | 8 | model = Vanilla2SLS() 9 | model.fit(data) 10 | ITE = model.predict(data.train) 11 | ATE,_ = model.ATE(data.train) 12 | ''' 13 | 14 | class Vanilla2SLS(object): 15 | def __init__(self) -> None: 16 | self.config = { 17 | 'methodName': 'Vanilla2SLS', 18 | 'seed': 2022 19 | } 20 | 21 | def set_Configuration(self, config): 22 | self.config = config 23 | 24 | def fit(self, data, exp=-1, config=None): 25 | if config is None: 26 | config = self.config 27 | 28 | set_seed(config['seed']) 29 | data.numpy() 30 | 31 | print('Run {}-th experiment for {}. '.format(exp, config['methodName'])) 32 | 33 | stage_1 = LinearRegression() 34 | stage_1.fit(np.concatenate([data.train.z, data.train.x], axis=1), data.train.t) 35 | t_hat = stage_1.predict(np.concatenate([data.train.z, data.train.x], axis=1)) 36 | 37 | stage_2 = LinearRegression() 38 | stage_2.fit(np.concatenate([t_hat, data.train.x], axis=1), data.train.y) 39 | 40 | self.data = data 41 | self.stage_1 = stage_1 42 | self.stage_2 = stage_2 43 | 44 | print('End. ' + '-'*20) 45 | 46 | def estimation(data): 47 | return stage_2.predict(np.concatenate([data.t-data.t, data.x], axis=1)), stage_2.predict(np.concatenate([data.t, data.x], axis=1)) 48 | 49 | self.estimation = estimation 50 | 51 | def predict(self, data=None, t=None, x=None): 52 | if data is None: 53 | data = self.data.test 54 | 55 | if x is None: 56 | x = data.x 57 | 58 | if t is None: 59 | t = data.t 60 | 61 | return self.stage_2.predict(np.concatenate([t, x], axis=1)) 62 | 63 | def ITE(self, data=None, t=None, x=None): 64 | if data is None: 65 | data = self.data.test 66 | 67 | if x is None: 68 | x = data.x 69 | 70 | if t is None: 71 | t = data.t 72 | 73 | ITE_0 = self.stage_2.predict(np.concatenate([t-t, x], axis=1)) 74 | ITE_1 = self.stage_2.predict(np.concatenate([t-t+1, x], axis=1)) 75 | ITE_t = self.stage_2.predict(np.concatenate([t, x], axis=1)) 76 | 77 | return ITE_0,ITE_1,ITE_t 78 | 79 | def ATE(self, data=None, t=None, x=None): 80 | ITE_0,ITE_1,ITE_t = self.ITE(data,t,x) 81 | 82 | return np.mean(ITE_1-ITE_0), np.mean(ITE_t-ITE_0) -------------------------------------------------------------------------------- /mliv/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .loaddata import CausalDataset, cat, split 2 | from .setenv import set_cuda, set_seed, set_tf_seed, get_device -------------------------------------------------------------------------------- /mliv/utils/loaddata.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import copy 3 | import numpy as np 4 | try: 5 | import torch 6 | from torch.utils.data import Dataset 7 | except: 8 | print('No module named torch. Please pip install torch') 9 | 10 | def get_var_df(df,var): 11 | var_cols = [c for c in df.columns if c.startswith(var)] 12 | return df[var_cols].to_numpy() 13 | 14 | def cat(data_list, axis=1): 15 | try: 16 | output=torch.cat(data_list,axis) 17 | except: 18 | output=np.concatenate(data_list,axis) 19 | 20 | return output 21 | 22 | def split(data, split_ratio=0.5): 23 | data1 = copy.deepcopy(data) 24 | data2 = copy.deepcopy(data) 25 | 26 | split_num = int(data.length * split_ratio) 27 | data1.split(0, split_num) 28 | data2.split(split_num, data.length) 29 | 30 | return data1, data2 31 | 32 | class CausalDataset(object): 33 | def __init__(self, path): 34 | self.path = path 35 | self.train = getDataset(pd.read_csv(path + 'train.csv')) 36 | self.valid = getDataset(pd.read_csv(path + 'valid.csv')) 37 | self.test = getDataset(pd.read_csv(path + 'test.csv')) 38 | 39 | def split(self, split_ratio=0.5, data=None): 40 | if data is None: 41 | data = self.train 42 | 43 | data1, data2 = split(data, split_ratio) 44 | self.data1 = data1 45 | self.data2 = data2 46 | 47 | def get_train(self): 48 | return self.train 49 | 50 | def get_valid(self): 51 | return self.valid 52 | 53 | def get_test(self): 54 | return self.test 55 | 56 | def get_data(self): 57 | return self.train,self.valid,self.test 58 | 59 | def tensor(self): 60 | self.train.tensor() 61 | self.valid.tensor() 62 | self.test.tensor() 63 | 64 | def double(self): 65 | self.train.double() 66 | self.valid.double() 67 | self.test.double() 68 | 69 | def float(self): 70 | self.train.float() 71 | self.valid.float() 72 | self.test.float() 73 | 74 | def detach(self): 75 | self.train.detach() 76 | self.valid.detach() 77 | self.test.detach() 78 | 79 | def to(self, device='cpu'): 80 | self.train.to(device) 81 | self.valid.to(device) 82 | self.test.to(device) 83 | 84 | def cpu(self): 85 | self.train.cpu() 86 | self.valid.cpu() 87 | self.test.cpu() 88 | 89 | def numpy(self): 90 | self.train.numpy() 91 | self.valid.numpy() 92 | self.test.numpy() 93 | 94 | class TorchDataset(Dataset): 95 | def __init__(self, data, device='cpu', type='tensor'): 96 | if type == 'tensor': 97 | data.tensor() 98 | else: 99 | data.double() 100 | data.to(device) 101 | 102 | self.data = data 103 | 104 | def __getitem__(self, idx): 105 | var_dict = {} 106 | for var in self.data.Vars: 107 | exec(f'var_dict[\'{var}\']=self.{var}[idx]') 108 | 109 | return var_dict 110 | 111 | def __len__(self): 112 | return self.data.length 113 | 114 | class getDataset(Dataset): 115 | def __init__(self, df): 116 | self.length = len(df) 117 | self.Vars = list(set([col[0] for col in df.columns])) 118 | 119 | for var in self.Vars: 120 | exec(f'self.{var}=get_var_df(df, \'{var}\')') 121 | 122 | if not hasattr(self, 'i'): 123 | self.i = self.z 124 | self.Vars.append('i') 125 | 126 | def split(self, start, end): 127 | for var in self.Vars: 128 | try: 129 | exec(f'self.{var} = self.{var}[start:end]') 130 | except: 131 | pass 132 | 133 | self.length = end - start 134 | 135 | def cpu(self): 136 | for var in self.Vars: 137 | try: 138 | exec(f'self.{var} = self.{var}.cpu()') 139 | except: 140 | break 141 | 142 | def cuda(self,n=0): 143 | for var in self.Vars: 144 | try: 145 | exec(f'self.{var} = self.{var}.cuda({n})') 146 | except: 147 | break 148 | 149 | def to(self,device='cpu'): 150 | for var in self.Vars: 151 | try: 152 | exec(f'self.{var} = self.{var}.to(\'{device}\')') 153 | except: 154 | break 155 | 156 | def tensor(self): 157 | for var in self.Vars: 158 | try: 159 | exec(f'self.{var} = torch.Tensor(self.{var})') 160 | except: 161 | break 162 | 163 | def float(self): 164 | for var in self.Vars: 165 | try: 166 | exec(f'self.{var} = torch.Tensor(self.{var}).float()') 167 | except: 168 | break 169 | 170 | def double(self): 171 | for var in self.Vars: 172 | try: 173 | exec(f'self.{var} = torch.Tensor(self.{var}).double()') 174 | except: 175 | break 176 | 177 | def detach(self): 178 | for var in self.Vars: 179 | try: 180 | exec(f'self.{var} = self.{var}.detach()') 181 | except: 182 | break 183 | 184 | def numpy(self): 185 | try: 186 | self.detach() 187 | except: 188 | pass 189 | 190 | try: 191 | self.cpu() 192 | except: 193 | pass 194 | 195 | for var in self.Vars: 196 | try: 197 | exec(f'self.{var} = self.{var}.numpy()') 198 | except: 199 | break 200 | 201 | def pandas(self, path=None): 202 | var_list = [] 203 | var_dims = [] 204 | var_name = [] 205 | for var in self.Vars: 206 | exec(f'var_list.append(self.{var})') 207 | exec(f'var_dims.append(self.{var}.shape[1])') 208 | for i in range(len(self.Vars)): 209 | for d in range(var_dims[i]): 210 | var_name.append(self.Vars[i]+str(d)) 211 | df = pd.DataFrame(np.concatenate(var_list, axis=1),columns=var_name) 212 | 213 | if path is not None: 214 | df.to_csv(path, index=False) 215 | return df 216 | 217 | def __getitem__(self, idx): 218 | var_dict = {} 219 | for var in self.Vars: 220 | exec(f'var_dict[\'{var}\']=self.{var}[idx]') 221 | 222 | return var_dict 223 | 224 | def __len__(self): 225 | return self.length -------------------------------------------------------------------------------- /mliv/utils/setenv.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import argparse 4 | import os 5 | from numba import cuda 6 | 7 | try: 8 | import torch 9 | except: 10 | pass 11 | try: 12 | import tensorflow as tf 13 | except: 14 | pass 15 | 16 | def clear_cache(): 17 | try: 18 | if torch.cuda.is_available(): 19 | cuda.select_device(0) 20 | cuda.close() 21 | except: 22 | pass 23 | 24 | def set_cuda(CUDA='3'): 25 | os.environ["CUDA_VISIBLE_DEVICES"] = CUDA if isinstance(CUDA,str) else str(CUDA) 26 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 27 | 28 | def set_seed(seed=2021): 29 | np.random.seed(seed) 30 | random.seed(seed) 31 | os.environ['PYTHONHASHSEED'] = str(seed) 32 | 33 | torch.manual_seed(seed) 34 | torch.cuda.manual_seed_all(seed) 35 | torch.cuda.manual_seed(seed) 36 | 37 | torch.backends.cudnn.deterministic = True 38 | torch.backends.cudnn.benchmark = False 39 | 40 | def set_tf_seed(seed=2021): 41 | np.random.seed(seed) 42 | random.seed(seed) 43 | os.environ['PYTHONHASHSEED'] = str(seed) 44 | 45 | tf.reset_default_graph() 46 | tf.compat.v1.set_random_seed(seed) 47 | 48 | def get_device(GPU=True): 49 | device = torch.device('cuda' if torch.cuda.is_available() and GPU else "cpu") 50 | if torch.cuda.is_available(): 51 | torch.cuda.empty_cache() 52 | 53 | return device -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", 'r') as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="mliv", # 模块名称 8 | version="0.0.2", # 当前版本 9 | author="anpeng wu", # 作者 10 | author_email="anpwu2019@gmail.com", # 作者邮箱 11 | description="machine learning for instrumental variable (IV) regression", # 模块简介 12 | long_description=long_description, # 模块详细介绍 13 | long_description_content_type="text/markdown", # 模块详细介绍格式 14 | url="https://github.com/anpwu/mliv.git", # 模块github地址 15 | packages=setuptools.find_packages(), # 自动找到项目中导入的模块 16 | # 模块相关的元数据(更多的描述) 17 | classifiers=[ 18 | "Programming Language :: Python", 19 | "License :: OSI Approved :: Apache Software License", 20 | "Operating System :: OS Independent", 21 | ], 22 | # 依赖模块 23 | install_requires=[ 24 | 'argparse', 25 | 'pillow', 26 | 'numba', 27 | 'cvxopt', 28 | ], 29 | # python版本 30 | python_requires=">=3.7", 31 | ) --------------------------------------------------------------------------------