├── .gitignore
├── LICENSE
├── README.md
├── demos.py
├── mliv
    ├── __init__.py
    ├── dataset
    │   ├── __init__.py
    │   └── demand
    │   │   ├── __init__.py
    │   │   └── demand_v1.py
    ├── inference
    │   ├── __init__.py
    │   ├── deep
    │   │   ├── __init__.py
    │   │   ├── deepiv_v1.py
    │   │   └── dfiv_v1.py
    │   ├── dflearning
    │   │   ├── __init__.py
    │   │   └── dfl_v1.py
    │   ├── gmm
    │   │   ├── __init__.py
    │   │   ├── agmm_v1
    │   │   │   ├── __init__.py
    │   │   │   ├── net.py
    │   │   │   ├── oadam.py
    │   │   │   ├── rbflayer.py
    │   │   │   └── trainer.py
    │   │   └── deepgmm_v1
    │   │   │   ├── __init__.py
    │   │   │   ├── dataclass.py
    │   │   │   ├── model.py
    │   │   │   ├── nn_structure
    │   │   │       ├── __init__.py
    │   │   │       ├── nn_structure_for_demand_image.py
    │   │   │       ├── nn_structure_for_demand_old.py
    │   │   │       ├── nn_structure_for_dsprite.py
    │   │   │       └── nn_structure_for_sin.py
    │   │   │   ├── trainer.py
    │   │   │   └── utils
    │   │   │       ├── __init__.py
    │   │   │       ├── custom_logging.py
    │   │   │       └── pytorch_linear_reg_utils.py
    │   ├── onestage
    │   │   ├── __init__.py
    │   │   └── onesiv_v1.py
    │   ├── sieve
    │   │   ├── __init__.py
    │   │   ├── dualiv_v1.py
    │   │   └── kerneliv_v1.py
    │   └── twosls
    │   │   ├── __init__.py
    │   │   ├── nn2sls_v1.py
    │   │   ├── poly2sls_v1.py
    │   │   └── vanilla2sls_v1.py
    └── utils
    │   ├── __init__.py
    │   ├── loaddata.py
    │   └── setenv.py
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # about ignore
  2 | *Results/
  3 | *Result/
  4 | *results/
  5 | *result/
  6 | *Data/
  7 | *data/
  8 | *run/
  9 | *ckpts/
 10 | *build/
 11 | *dist/
 12 | *mliv.egg-info/
 13 | *cache/
 14 | *miniImageNet_WRN_60Epoch/
 15 | 
 16 | # Byte-compiled / optimized / DLL files
 17 | __pycache__/
 18 | *.py[cod]
 19 | *$py.class
 20 | 
 21 | # C extensions
 22 | *.so
 23 | 
 24 | # Distribution / packaging
 25 | .Python
 26 | build/
 27 | develop-eggs/
 28 | dist/
 29 | downloads/
 30 | eggs/
 31 | .eggs/
 32 | lib/
 33 | lib64/
 34 | parts/
 35 | sdist/
 36 | var/
 37 | wheels/
 38 | pip-wheel-metadata/
 39 | share/python-wheels/
 40 | *.egg-info/
 41 | .installed.cfg
 42 | *.egg
 43 | MANIFEST
 44 | 
 45 | # PyInstaller
 46 | #  Usually these files are written by a python script from a template
 47 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 48 | *.manifest
 49 | *.spec
 50 | 
 51 | # Installer logs
 52 | pip-log.txt
 53 | pip-delete-this-directory.txt
 54 | 
 55 | # Unit test / coverage reports
 56 | htmlcov/
 57 | .tox/
 58 | .nox/
 59 | .coverage
 60 | .coverage.*
 61 | .cache
 62 | nosetests.xml
 63 | coverage.xml
 64 | *.cover
 65 | *.py,cover
 66 | .hypothesis/
 67 | .pytest_cache/
 68 | 
 69 | # Translations
 70 | *.mo
 71 | *.pot
 72 | 
 73 | # Django stuff:
 74 | *.log
 75 | local_settings.py
 76 | db.sqlite3
 77 | db.sqlite3-journal
 78 | 
 79 | # Flask stuff:
 80 | instance/
 81 | .webassets-cache
 82 | 
 83 | # Scrapy stuff:
 84 | .scrapy
 85 | 
 86 | # Sphinx documentation
 87 | docs/_build/
 88 | 
 89 | # PyBuilder
 90 | target/
 91 | 
 92 | # Jupyter Notebook
 93 | .ipynb_checkpoints
 94 | 
 95 | # IPython
 96 | profile_default/
 97 | ipython_config.py
 98 | 
 99 | # pyenv
100 | .python-version
101 | 
102 | # pipenv
103 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
104 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
105 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
106 | #   install all needed dependencies.
107 | #Pipfile.lock
108 | 
109 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
110 | __pypackages__/
111 | 
112 | # Celery stuff
113 | celerybeat-schedule
114 | celerybeat.pid
115 | 
116 | # SageMath parsed files
117 | *.sage.py
118 | 
119 | # Environments
120 | .env
121 | .venv
122 | env/
123 | venv/
124 | ENV/
125 | env.bak/
126 | venv.bak/
127 | 
128 | # Spyder project settings
129 | .spyderproject
130 | .spyproject
131 | 
132 | # Rope project settings
133 | .ropeproject
134 | 
135 | # mkdocs documentation
136 | /site
137 | 
138 | # mypy
139 | .mypy_cache/
140 | .dmypy.json
141 | dmypy.json
142 | 
143 | # Pyre type checker
144 | .pyre/
145 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 causal-machine-learning-lab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # mliv
 2 | 
 3 | 
 4 | ```python
 5 | from mliv.dataset.demand import gen_data
 6 | from mliv.utils import CausalDataset
 7 | gen_data()
 8 | data = CausalDataset('./Data/Demand/0.5_1.0_0.0_10000/1/')
 9 | 
10 | from mliv.inference import Vanilla2SLS
11 | from mliv.inference import Poly2SLS
12 | from mliv.inference import NN2SLS
13 | from mliv.inference import OneSIV
14 | from mliv.inference import KernelIV
15 | from mliv.inference import DualIV
16 | from mliv.inference import DFL
17 | from mliv.inference import AGMM
18 | from mliv.inference import DeepGMM
19 | from mliv.inference import DFIV
20 | from mliv.inference import DeepIV           # Tensorflow & keras
21 | 
22 | for mod in [OneSIV,KernelIV,DualIV,DFL,AGMM,DeepGMM,DFIV,Vanilla2SLS,Poly2SLS,NN2SLS]:
23 |     model = mod()
24 |     model.config['num'] = 100
25 |     model.config['epochs'] = 10
26 |     model.fit(data)
27 | 
28 |     print(mod)
29 | 
30 | 
31 | try:
32 |     model = DeepIV()
33 |     model.config['num'] = 100
34 |     model.config['epochs'] = 10
35 |     model.fit(data)
36 | 
37 |     print(mod)
38 | except:
39 |     print(f'Error: {mod} is implemented by Tensorflow.')
40 | 
41 | 
42 | ```


--------------------------------------------------------------------------------
/demos.py:
--------------------------------------------------------------------------------
 1 | from mliv.dataset.demand import gen_data
 2 | from mliv.utils import CausalDataset
 3 | gen_data()
 4 | data = CausalDataset('./Data/Demand/0.5_1.0_0.0_10000/1/')
 5 | 
 6 | from mliv.inference import Vanilla2SLS
 7 | from mliv.inference import Poly2SLS
 8 | from mliv.inference import NN2SLS
 9 | from mliv.inference import OneSIV
10 | from mliv.inference import KernelIV
11 | from mliv.inference import DualIV
12 | from mliv.inference import DFL
13 | from mliv.inference import AGMM
14 | from mliv.inference import DeepGMM
15 | from mliv.inference import DFIV
16 | try:
17 |     from mliv.inference import DeepIV
18 | except:
19 |     pass
20 | 
21 | for mod in [OneSIV,KernelIV,DualIV,DFL,AGMM,DeepGMM,DFIV,Vanilla2SLS,Poly2SLS,NN2SLS]:
22 | 
23 |     try:
24 |         model = mod()
25 |         model.config['num'] = 100
26 |         model.config['epochs'] = 10
27 |         model.fit(data)
28 | 
29 |         print(mod)
30 |     except:
31 |         print('Error: ...')
32 | 
33 | try:
34 |     model = DeepIV()
35 |     model.config['num'] = 100
36 |     model.config['epochs'] = 10
37 |     model.fit(data)
38 | 
39 |     print(mod)
40 | except:
41 |     print(f'Error: ...{mod}...')
42 | 


--------------------------------------------------------------------------------
/mliv/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation. All rights reserved.
 2 | # Licensed under the MIT License.
 3 | 
 4 | name = "mliv"
 5 | __version__ = "0.0.1"
 6 | __all__ = [
 7 |     "dataset",
 8 |     "inference",
 9 |     "utils",
10 | ]
11 | 


--------------------------------------------------------------------------------
/mliv/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/causal-machine-learning-lab/mliv/58c45ae08888b97d470a1837205cf35bc34b03d2/mliv/dataset/__init__.py


--------------------------------------------------------------------------------
/mliv/dataset/demand/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | from .demand_v1 import  generate_Demand_train, generate_Demand_test, set_Configuration, config
 4 | 
 5 | example = '''
 6 | from mliv.dataset.demand import gen_data
 7 | from mliv.utils import CausalDataset
 8 | gen_data()
 9 | data = CausalDataset('./Data/Demand/0.5_1.0_0.0_10000/1/')
10 | '''
11 | 
12 | def gen_data(config=config):
13 |     config, config_trt, config_val, config_tst = set_Configuration(config)
14 |     exps = config['exps']
15 |     dataName = config['dataName']
16 |     path = './Data/{}/{}_{}_{}_{}/'.format(config['dataName'],config['rho'],config['alpha'],config['beta'],config['num'])
17 |     print(f'The path: {path}')
18 | 
19 |     for exp in range(exps):
20 | 
21 |         print(f'Generate {dataName} datasets - {exp}/{exps}. ')
22 | 
23 |         config_trt['seed'], config_val['seed'], config_tst['seed'] = config_trt['seed'] + exp*333, config_val['seed'] + exp*444, config_tst['seed'] + exp*555
24 | 
25 |         train = generate_Demand_train(**config_trt)
26 |         valid = generate_Demand_train(**config_val)
27 |         test  = generate_Demand_test(**config_tst)
28 | 
29 |         data_path = path + '/{}/'.format(exp)
30 |         os.makedirs(os.path.dirname(data_path), exist_ok=True)
31 |         
32 |         train.to_csv(data_path + '/train.csv', index=False)
33 |         valid.to_csv(data_path + '/valid.csv', index=False)
34 |         test.to_csv(data_path + '/test.csv', index=False)
35 | 
36 |         configs = {'config':config, 'config_trt':config_trt, 'config_val':config_val, 'config_tst':config_tst}
37 |         with open(data_path + "/configs.json", "w") as file:
38 |             file.write( json.dumps(configs) )
39 | 
40 |     return config


--------------------------------------------------------------------------------
/mliv/dataset/demand/demand_v1.py:
--------------------------------------------------------------------------------
  1 | from itertools import product
  2 | import numpy as np
  3 | from numpy.random import default_rng
  4 | from pandas import DataFrame
  5 | import os
  6 | 
  7 | np.random.seed(42)
  8 | 
  9 | example = '''
 10 | from mliv.dataset.demand.demand_v1 import generate_Demand_train, generate_Demand_test, set_Configuration
 11 | 
 12 | config, config_trt, config_val, config_tst = set_Configuration()
 13 | 
 14 | train = generate_Demand_train(**config_trt)
 15 | valid = generate_Demand_train(**config_val)
 16 | test  = generate_Demand_test(**config_tst)
 17 | '''
 18 | 
 19 | config = {
 20 |     'dataName': 'Demand',
 21 |     'exps': 10,
 22 |     'num': 10000,
 23 |     'rho': 0.5,
 24 |     'alpha': 1.0,
 25 |     'beta': 0.0, 
 26 |     'seed': 2022,
 27 |     'num_val': 10000,
 28 |     'seed_val': 3033,
 29 |     'seed_tst': 4044
 30 |     }
 31 | 
 32 | def set_Configuration(config=config):
 33 |     config_trt = {}
 34 |     keys_trt = ['num', 'rho', 'alpha', 'beta', 'seed']
 35 |     for key in keys_trt:
 36 |         config_trt[key] = config[key]
 37 | 
 38 |     config_val = {}
 39 |     keys_val = ['rho', 'alpha', 'beta']
 40 |     for key in keys_val:
 41 |         config_val[key] = config[key]
 42 |     config_val['num'] = config['num_val']
 43 |     config_val['seed'] = config['seed_val']
 44 | 
 45 |     config_tst = {}
 46 |     keys_tst = ['rho', 'alpha', 'beta']
 47 |     for key in keys_tst:
 48 |         config_tst[key] = config[key]
 49 |     config_tst['seed'] = config['seed_tst']
 50 | 
 51 |     return config, config_trt, config_val, config_tst
 52 | 
 53 | def h(t):
 54 |     return 2 * ((t - 5) ** 4 / 600 + np.exp(-4 * (t - 5) ** 2) + t / 10 - 2)
 55 | 
 56 | def f(p, t, s):
 57 |     return 100 + (10 + p) * s * h(t) - 2 * p
 58 | 
 59 | def generate_Demand_train(num=10000, rho=0.5, alpha=1, beta=0, seed=2021):
 60 | 
 61 |     rng=default_rng(seed)
 62 |     
 63 |     emotion = rng.choice(list(range(1, 8)), (num,1))
 64 |     time = rng.uniform(0, 10, (num,1))
 65 |     cost = rng.normal(0, 1.0, (num,1))
 66 |     noise_price = rng.normal(0, 1.0, (num,1))
 67 |     noise_demand = rho * noise_price + rng.normal(0, np.sqrt(1 - rho ** 2), (num,1))
 68 |     price = 25 + (alpha * cost + 3) * h(time) + beta * cost + noise_price
 69 |     structural = f(price, time, emotion).astype(float)
 70 |     outcome = (structural + noise_demand).astype(float)
 71 |     
 72 |     mu0 = f(price-price, time, emotion).astype(float)
 73 |     mut = structural
 74 |     
 75 |     numpys = [noise_price,noise_demand, cost, time, emotion, time, emotion, price, mu0, mut, structural, outcome]
 76 |     
 77 |     train_data = DataFrame(np.concatenate(numpys, axis=1),
 78 |                           columns=['u1','u2','z1','x1','x2','c1','a1','t1','m0','mt','g1','y1'])
 79 |     
 80 |     return train_data
 81 | 
 82 | def generate_Demand_test(rho=0.5, alpha=1, beta=0, seed=2021):
 83 | 
 84 |     rng=default_rng(seed)
 85 |     
 86 |     noise_price = rng.normal(0, 1.0, (2800,1))
 87 |     noise_demand = rho * noise_price + rng.normal(0, np.sqrt(1 - rho ** 2), (2800,1))
 88 |     
 89 |     cost = np.linspace(-1.0, 1.0, 20)
 90 |     time = np.linspace(0.0, 10, 20)
 91 |     emotion = np.array([1, 2, 3, 4, 5, 6, 7])
 92 |     
 93 |     data = []
 94 |     price_z = []
 95 |     for c, t, s in product(cost, time, emotion):
 96 |         data.append([c, t, s])
 97 |         price_z.append(25 + (alpha * c + 3) * h(t) + beta * c)
 98 |     features = np.array(data)
 99 |     price_z = np.array(price_z)[:, np.newaxis]
100 |     price = price_z + noise_price
101 |     
102 |     structural = f(price, features[:,1:2], features[:,2:3]).astype(float)
103 |     outcome = (structural + noise_demand).astype(float)
104 |     
105 |     mu0 = f(price-price, features[:,1:2], features[:,2:3]).astype(float)
106 |     mut = structural
107 |     
108 |     numpys = [noise_price, noise_demand, features, features[:,1:3], price, mu0, mut, structural, outcome]
109 |     
110 |     test_data = DataFrame(np.concatenate(numpys, axis=1),
111 |                           columns=['u1','u2','z1','x1','x2','c1','a1','t1','m0','mt','g1','y1'])
112 |     
113 |     return test_data


--------------------------------------------------------------------------------
/mliv/inference/__init__.py:
--------------------------------------------------------------------------------
 1 | from .twosls import Vanilla2SLS, Poly2SLS, NN2SLS
 2 | from .dflearning import DFL
 3 | from .onestage import OneSIV
 4 | from .sieve import KernelIV, DualIV
 5 | from .gmm import AGMM, DeepGMM
 6 | from .deep import DFIV
 7 | try:
 8 |     from .deep import DeepIV
 9 | except:
10 |     pass


--------------------------------------------------------------------------------
/mliv/inference/deep/__init__.py:
--------------------------------------------------------------------------------
1 | from .dfiv_v1 import DFIV
2 | try:
3 |     from .deepiv_v1 import DeepIV
4 | except:
5 |     pass


--------------------------------------------------------------------------------
/mliv/inference/deep/deepiv_v1.py:
--------------------------------------------------------------------------------
  1 | # from __future__ import absolute_import, division, print_function, unicode_literals
  2 | 
  3 | import keras
  4 | import types
  5 | import random
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | from mliv.utils import set_seed
  9 | 
 10 | from keras import backend as K
 11 | from keras.layers import Input, Dense, Convolution2D, Dense, Dropout, Flatten, MaxPooling2D, Lambda
 12 | from keras.models import Model, Sequential
 13 | from keras.layers.merge import Concatenate
 14 | from keras.backend import clear_session
 15 | from keras.engine.topology import InputLayer
 16 | from keras.regularizers import l2
 17 | from keras.constraints import maxnorm
 18 | from keras.utils import np_utils
 19 | from keras.layers.core import Reshape
 20 | 
 21 | if K.backend() == "theano":
 22 |     from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 23 |     import theano.tensor as tensor
 24 |     _FLOATX = theano.config.floatX
 25 |     Lop = tensor.Lop
 26 | elif K.backend() == "tensorflow":
 27 |     def Lop(output, wrt, eval_points):
 28 |         grads = tf.gradients(output, wrt, grad_ys=eval_points)
 29 |         return grads
 30 | 
 31 | example = '''
 32 | from mliv.inference import DeepIV
 33 | 
 34 | model = DeepIV()
 35 | model.fit(data)
 36 | ITE = model.predict(data.train)
 37 | ATE,_ = model.ATE(data.train)
 38 | '''
 39 | 
 40 | ############ from DeepIV_True.custom_gradients import replace_gradients_mse
 41 | 
 42 | def get_gradients(self, loss, params):
 43 |     '''
 44 |     Replacement for the default keras get_gradients() function.
 45 |     Modification: checks if the object has the attribute grads and 
 46 |     returns that rather than calculating the gradients using automatic
 47 |     differentiation. 
 48 |     '''
 49 |     if hasattr(self, 'grads'):
 50 |         grads = self.grads
 51 |     else:
 52 |         grads = K.gradients(loss, params)
 53 |     if hasattr(self, 'clipnorm') and self.clipnorm > 0:
 54 |         norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads]))
 55 |         grads = [clip_norm(g, self.clipnorm, norm) for g in grads]
 56 |     if hasattr(self, 'clipvalue') and self.clipvalue > 0:
 57 |         grads = [K.clip(g, -self.clipvalue, self.clipvalue) for g in grads]
 58 |     return grads
 59 | 
 60 | def replace_gradients_mse(model, opt, batch_size, n_samples = 1):
 61 |     '''
 62 |     Replace the gradients of a Keras model with mean square error loss.
 63 |     '''
 64 |     # targets has been repeated twice so the below creates two identical columns
 65 |     # of the target values - we'll only use the first column.
 66 |     targets = K.reshape(model.targets[0], (batch_size, n_samples * 2))
 67 |     output =  K.mean(K.reshape(model.outputs[0], (batch_size, n_samples, 2)), axis=1)
 68 |     # compute d Loss / d output
 69 |     dL_dOutput = (output[:,0] - targets[:,0]) * (2.) / batch_size
 70 |     # compute (d Loss / d output) (d output / d theta) for each theta
 71 |     trainable_weights = model.trainable_weights
 72 |     grads = Lop(output[:,1], wrt=trainable_weights, eval_points=dL_dOutput) 
 73 |     # compute regularizer gradients
 74 | 
 75 |     # add loss with respect to regularizers
 76 |     reg_loss = model.total_loss * 0.
 77 |     for r in model.losses:
 78 |          reg_loss += r
 79 |     reg_grads = K.gradients(reg_loss, trainable_weights)
 80 |     grads = [g+r for g,r in zip(grads, reg_grads)]
 81 |     
 82 |     opt = keras.optimizers.get(opt)
 83 |     # Patch keras gradient calculation to allow for user defined gradients
 84 |     opt.get_gradients = types.MethodType( get_gradients, opt )
 85 |     opt.grads = grads
 86 |     model.optimizer = opt
 87 |     return model
 88 | 
 89 | ######### import DeepIV_True.densities as densities
 90 | 
 91 | def split(start, stop):
 92 |     return Lambda(lambda x: x[:, start:stop], output_shape=(None, stop-start))
 93 | 
 94 | def split_mixture_of_gaussians(x, n_components):
 95 |     pi = split(0, n_components)(x)
 96 |     mu = split(n_components, 2*n_components)(x)
 97 |     log_sig = split(2*n_components, 3*n_components)(x)
 98 |     return pi, mu, log_sig
 99 | 
100 | def log_norm_pdf(x, mu, log_sig):
101 |     z = (x - mu) / (K.exp(K.clip(log_sig, -40, 40))) #TODO: get rid of this clipping
102 |     return -(0.5)*K.log(2*np.pi) - log_sig - 0.5*((z)**2)
103 | 
104 | def mix_gaussian_loss(x, mu, log_sig, w):
105 |     '''
106 |     Combine the mixture of gaussian distribution and the loss into a single function
107 |     so that we can do the log sum exp trick for numerical stability...
108 |     '''
109 |     if K.backend() == "tensorflow":
110 |         x.set_shape([None, 1])
111 |     gauss = log_norm_pdf(K.repeat_elements(x=x, rep=mu.shape[1], axis=1), mu, log_sig)
112 |     # TODO: get rid of clipping.
113 |     gauss = K.clip(gauss, -40, 40)
114 |     max_gauss = K.maximum((0.), K.max(gauss))
115 |     # log sum exp trick...
116 |     gauss = gauss - max_gauss
117 |     out = K.sum(w * K.exp(gauss), axis=1)
118 |     loss = K.mean(-K.log(out) + max_gauss)
119 |     return loss
120 | 
121 | def mixture_of_gaussian_output(x, n_components):
122 |     mu = keras.layers.Dense(n_components, activation='linear')(x)
123 |     log_sig = keras.layers.Dense(n_components, activation='linear')(x)
124 |     pi = keras.layers.Dense(n_components, activation='softmax')(x)
125 |     return Concatenate(axis=1)([pi, mu, log_sig])
126 | 
127 | def mixture_of_gaussian_loss(y_true, y_pred, n_components):
128 |     pi, mu, log_sig = split_mixture_of_gaussians(y_pred, n_components)
129 |     return mix_gaussian_loss(y_true, mu, log_sig, pi)
130 | 
131 | ######### import DeepIV_True.samplers as samplers
132 | 
133 | def random_laplace(shape, mu=0., b=1.):
134 |     '''
135 |     Draw random samples from a Laplace distriubtion.
136 | 
137 |     See: https://en.wikipedia.org/wiki/Laplace_distribution#Generating_random_variables_according_to_the_Laplace_distribution
138 |     '''
139 |     U = K.random_uniform(shape, -0.5, 0.5)
140 |     return mu - b * K.sign(U) * K.log(1 - 2 * K.abs(U))
141 | 
142 | def random_normal(shape, mean=0.0, std=1.0):
143 |     return K.random_normal(shape, mean, std)
144 | 
145 | def random_multinomial(logits, seed=None):
146 |     '''
147 |     Theano function for sampling from a multinomal with probability given by `logits`
148 |     '''
149 |     if K.backend() == "theano":
150 |         if seed is None:
151 |             seed = np.random.randint(1, 10e6)
152 |         rng = RandomStreams(seed=seed)
153 |         return rng.multinomial(n=1, pvals=logits, ndim=None, dtype=_FLOATX)
154 |     elif K.backend() == "tensorflow":
155 |         return tf.one_hot(tf.squeeze(tf.multinomial(K.log(logits), num_samples=1)),
156 |                           int(logits.shape[1]))
157 | 
158 | def random_gmm(pi, mu, sig):
159 |     '''
160 |     Sample from a gaussian mixture model. Returns one sample for each row in
161 |     the pi, mu and sig matrices... this is potentially wasteful (because you have to repeat
162 |     the matrices n times if you want to get n samples), but makes it easy to implment
163 |     code where the parameters vary as they are conditioned on different datapoints.
164 |     '''
165 |     normals = random_normal(K.shape(mu), mu, sig)
166 |     k = random_multinomial(pi)
167 |     return K.sum(normals * k, axis=1, keepdims=True)
168 | 
169 | 
170 | ######### from DeepIV_True.models import Treatment, Response
171 | 
172 | class Treatment(Model):
173 |     '''
174 |     Adds sampling functionality to a Keras model and extends the losses to support
175 |     mixture of gaussian losses.
176 | 
177 |     # Argument
178 |     '''
179 | 
180 |     def _get_sampler_by_string(self, loss):
181 |         output = self.outputs[0]
182 |         inputs = self.inputs
183 | 
184 |         if loss in ["MSE", "mse", "mean_squared_error"]:
185 |             output += random_normal(K.shape(output), mean=0.0, std=1.0)
186 |             draw_sample = K.function(inputs + [K.learning_phase()], [output])
187 | 
188 |             def sample_gaussian(inputs, use_dropout=False):
189 |                 '''
190 |                 Helper to draw samples from a gaussian distribution
191 |                 '''
192 |                 return draw_sample(inputs + [int(use_dropout)])[0]
193 | 
194 |             return sample_gaussian
195 | 
196 |         elif loss == "binary_crossentropy":
197 |             output = K.random_binomial(K.shape(output), p=output)
198 |             draw_sample = K.function(inputs + [K.learning_phase()], [output])
199 | 
200 |             def sample_binomial(inputs, use_dropout=False):
201 |                 '''
202 |                 Helper to draw samples from a binomial distribution
203 |                 '''
204 |                 return draw_sample(inputs + [int(use_dropout)])[0]
205 | 
206 |             return sample_binomial
207 | 
208 |         elif loss in ["mean_absolute_error", "mae", "MAE"]:
209 |             output += random_laplace(K.shape(output), mu=0.0, b=1.0)
210 |             draw_sample = K.function(inputs + [K.learning_phase()], [output])
211 |             def sample_laplace(inputs, use_dropout=False):
212 |                 '''
213 |                 Helper to draw samples from a Laplacian distribution
214 |                 '''
215 |                 return draw_sample(inputs + [int(use_dropout)])[0]
216 | 
217 |             return sample_laplace
218 | 
219 |         elif loss == "mixture_of_gaussians":
220 |             pi, mu, log_sig = split_mixture_of_gaussians(output, self.n_components)
221 |             samples = random_gmm(pi, mu, K.exp(log_sig))
222 |             draw_sample = K.function(inputs + [K.learning_phase()], [samples])
223 |             return lambda inputs, use_dropout: draw_sample(inputs + [int(use_dropout)])[0]
224 | 
225 |         else:
226 |             raise NotImplementedError("Unrecognised loss: %s. Cannot build a generic sampler" % loss)
227 | 
228 |     def _prepare_sampler(self, loss):
229 |         '''
230 |         Build sampler
231 |         '''
232 |         if isinstance(loss, str):
233 |             self._sampler = self._get_sampler_by_string(loss)
234 |         else:
235 |             warnings.warn("You're using a custom loss function. Make sure you implement\
236 |                            the model's sample() fuction yourself.")
237 | 
238 |     def compile(self, optimizer, loss, metrics=None, loss_weights=None,
239 |                 sample_weight_mode=None, n_components=None, **kwargs):
240 |         '''
241 |         Overrides the existing keras compile function to add a sampler building
242 |         step to the model compilation phase. Once compiled, one can draw samples
243 |         from the network using the sample() function and adds support for mixture
244 |         of gaussian loss.
245 | 
246 |         '''
247 |         if loss == "mixture_of_gaussians":
248 |             if n_components is None:
249 |                 raise Exception("When using mixture of gaussian loss you must\
250 |                                  supply n_components argument")
251 |             self.n_components = n_components
252 |             self._prepare_sampler(loss)
253 |             loss = lambda y_true, y_pred: mixture_of_gaussian_loss(y_true,y_pred,n_components)
254 |         else:
255 |             self._prepare_sampler(loss)
256 | 
257 |         super(Treatment, self).compile(optimizer, loss, metrics=metrics, loss_weights=loss_weights,
258 |                                        sample_weight_mode=sample_weight_mode, **kwargs)
259 | 
260 |     def sample(self, inputs, n_samples=1, use_dropout=False):
261 |         '''
262 |         Draw samples from the keras model.
263 |         '''
264 |         if hasattr(self, "_sampler"):
265 |             if not isinstance(inputs, list):
266 |                 inputs = [inputs]
267 |             inputs = [i.repeat(n_samples, axis=0) for i in inputs]
268 |             return self._sampler(inputs, use_dropout)
269 |         else:
270 |             raise Exception("Compile model with loss before sampling")
271 | 
272 | class Response(Model):
273 |     '''
274 |     Extends the Keras Model class to support sampling from the Treatment
275 |     model during training.
276 | 
277 |     Overwrites the existing fit_generator function.
278 | 
279 |     # Arguments
280 |     In addition to the standard model arguments, a Response object takes
281 |     a Treatment object as input so that it can sample from the fitted treatment
282 |     distriubtion during training.
283 |     '''
284 |     def __init__(self, treatment, **kwargs):
285 |         if isinstance(treatment, Treatment):
286 |             self.treatment = treatment
287 |         else:
288 |             raise TypeError("Expected a treatment model of type Treatment. \
289 |                              Got a model of type %s. Remember to train your\
290 |                              treatment model first." % type(treatment))
291 |         super(Response, self).__init__(**kwargs)
292 | 
293 |     def compile(self, optimizer, loss, metrics=None, loss_weights=None, sample_weight_mode=None,
294 |                 unbiased_gradient=False,n_samples=1, batch_size=None):
295 |         super(Response, self).compile(optimizer=optimizer, loss=loss, loss_weights=loss_weights,
296 |                                       sample_weight_mode=sample_weight_mode)
297 |         self.unbiased_gradient = unbiased_gradient
298 |         if unbiased_gradient:
299 |             if loss in ["MSE", "mse", "mean_squared_error"]:
300 |                 if batch_size is None:
301 |                     raise ValueError("Must supply a batch_size argument if using unbiased gradients. Currently batch_size is None.")
302 |                 replace_gradients_mse(self, optimizer, batch_size=batch_size, n_samples=n_samples)
303 |             else:
304 |                 warnings.warn("Unbiased gradient only implemented for mean square error loss. It is unnecessary for\
305 |                               logistic losses and currently not implemented for absolute error losses.")
306 |             
307 | 
308 |     def fit(self, x=None, y=None, batch_size=512, epochs=1, verbose=1, callbacks=None,
309 |             validation_data=None, class_weight=None, initial_epoch=0, samples_per_batch=None,
310 |             seed=None, observed_treatments=None):
311 |         '''
312 |         Trains the model by sampling from the fitted treament distribution.
313 | 
314 |         # Arguments
315 |             x: list of numpy arrays. The first element should *always* be the instrument variables.
316 |             y: (numpy array). Target response variables.
317 |             The remainder of the arguments correspond to the Keras definitions.
318 |         '''
319 |         batch_size = np.minimum(y.shape[0], batch_size)
320 |         if seed is None:
321 |             seed = np.random.randint(0, 1e6)
322 |         if samples_per_batch is None:
323 |             if self.unbiased_gradient:
324 |                 samples_per_batch = 2
325 |             else:
326 |                 samples_per_batch = 1
327 | 
328 |         if observed_treatments is None:
329 |             generator = SampledSequence(x[1:], x[0], y, batch_size, self.treatment.sample, samples_per_batch)
330 |         else:
331 |             generator = OnesidedUnbaised(x[1:], x[0], y, observed_treatments, batch_size,
332 |                                          self.treatment.sample, samples_per_batch)
333 |         
334 |         steps_per_epoch = y.shape[0]  // batch_size
335 |         super(Response, self).fit_generator(generator=generator,
336 |                                             steps_per_epoch=steps_per_epoch,
337 |                                             epochs=epochs, verbose=verbose,
338 |                                             callbacks=callbacks, validation_data=validation_data,
339 |                                             class_weight=class_weight, initial_epoch=initial_epoch)
340 | 
341 |     def fit_generator(self, **kwargs):
342 |         '''
343 |         We use override fit_generator to support sampling from the treatment model during training.
344 | 
345 |         If you need this functionality, you'll need to build a generator that samples from the
346 |         treatment and performs whatever transformations you're performing. Please submit a pull
347 |         request if you implement this.
348 |         '''
349 |         raise NotImplementedError("We use override fit_generator to support sampling from the\
350 |                                    treatment model during training.")
351 | 
352 |     def expected_representation(self, x, z, n_samples=100, batch_size=None, seed=None):
353 |         inputs = [z, x]
354 |         if not hasattr(self, "_E_representation"):
355 |             if batch_size is None:
356 |                 batch_size = inputs[0].shape[0]
357 |                 steps = 1
358 |             else:
359 |                 steps = inputs[0].shape[0] // batch_size
360 | 
361 |             intermediate_layer_model = Model(inputs=self.inputs,
362 |                                              outputs=self.layers[-2].output)
363 |             
364 |             def pred(inputs, n_samples=100, seed=None):
365 |                 features = inputs[1]
366 | 
367 |                 samples = self.treatment.sample(inputs, n_samples)
368 |                 batch_features = [features.repeat(n_samples, axis=0)] + [samples]
369 |                 representation = intermediate_layer_model.predict(batch_features)
370 |                 return representation.reshape((inputs[0].shape[0], n_samples, -1)).mean(axis=1)
371 |             self._E_representation = pred
372 |             return self._E_representation(inputs, n_samples, seed)
373 |         else:
374 |             return self._E_representation(inputs, n_samples, seed)
375 | 
376 |     def conditional_representation(self, x, p):
377 |         inputs = [x, p]
378 |         if not hasattr(self, "_c_representation"):          
379 |             intermediate_layer_model = Model(inputs=self.inputs,
380 |                                              outputs=self.layers[-2].output)
381 | 
382 |             self._c_representation = intermediate_layer_model.predict
383 |             return self._c_representation(inputs)
384 |         else:
385 |             return self._c_representation(inputs)
386 | 
387 |     def dropout_predict(self, x, z, n_samples=100):
388 |         if isinstance(x, list):
389 |             inputs = [z] + x
390 |         else:
391 |             inputs = [z, x]
392 |         if not hasattr(self, "_dropout_predict"):
393 |             
394 |             predict_with_dropout = K.function(self.inputs + [K.learning_phase()],
395 |                                               [self.layers[-1].output])
396 | 
397 |             def pred(inputs, n_samples = 100):
398 |                 # draw samples from the treatment network with dropout turned on
399 |                 samples = self.treatment.sample(inputs, n_samples, use_dropout=True)
400 |                 # prepare inputs for the response network
401 |                 rep_inputs = [i.repeat(n_samples, axis=0) for i in inputs[1:]] + [samples]
402 |                 # return outputs from the response network with dropout turned on (learning_phase=0)
403 |                 return predict_with_dropout(rep_inputs + [1])[0]
404 |             self._dropout_predict = pred
405 |             return self._dropout_predict(inputs, n_samples)
406 |         else:
407 |             return self._dropout_predict(inputs, n_samples)
408 | 
409 |     def credible_interval(self, x, z, n_samples=100, p=0.95):
410 |         '''
411 |         Return a credible interval of size p using dropout variational inference.
412 |         '''
413 |         if isinstance(x, list):
414 |             n = x[0].shape[0]
415 |         else:
416 |             n = x.shape[0]
417 |         alpha = (1-p) / 2.
418 |         samples = self.dropout_predict(x, z, n_samples).reshape((n, n_samples, -1))
419 |         upper = np.percentile(samples.copy(), 100*(p+alpha), axis=1)
420 |         lower = np.percentile(samples.copy(), 100*(alpha), axis=1)
421 |         return lower, upper
422 | 
423 |     def _add_constant(self, X):
424 |         return np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
425 |     
426 |     def predict_confidence(self, x, p):
427 |         if hasattr(self, "_predict_confidence"):
428 |             return self._predict_confidence(x, p)
429 |         else:
430 |             raise Exception("Call fit_confidence_interval before running predict_confidence")
431 | 
432 |     
433 |     def fit_confidence_interval(self, x_lo, z_lo, p_lo, y_lo, n_samples=100, alpha=0.):
434 |         eta_bar = self.expected_representation(x=x_lo, z=z_lo, n_samples=n_samples)
435 |         pca = PCA(1-1e-16, svd_solver="full", whiten=True)
436 |         pca.fit(eta_bar)
437 | 
438 |         eta_bar = pca.transform(eta_bar)
439 |         eta_lo_prime = pca.transform(self.conditional_representation(x_lo, p_lo))
440 |         eta_lo = self._add_constant(eta_lo_prime)
441 | 
442 |         ols1 = linear_model.Ridge(alpha=alpha, fit_intercept=True)
443 |         ols1.fit(eta_bar, eta_lo_prime)
444 |         hhat = ols1.predict(eta_bar)
445 |         ols2 = linear_model.Ridge(alpha=alpha, fit_intercept=False)
446 |         ols2.fit(self._add_constant(hhat), y_lo)
447 | 
448 |         yhat = ols2.predict(eta_lo)
449 |         hhi = np.linalg.inv(np.dot(eta_lo.T, eta_lo))
450 | 
451 |         heh = np.dot(eta_lo.T, np.square(y_lo - yhat) * eta_lo)
452 |         V = np.dot(np.dot(hhi, heh), hhi)
453 | 
454 |         def pred(xx, pp):
455 |             H = self._add_constant(pca.transform(self.conditional_representation(xx,pp)))
456 |             sdhb = np.sqrt(np.diag(np.dot(np.dot(H, V), H.T)))
457 |             hb = ols2.predict(H).flatten()
458 |             return hb, sdhb
459 |         
460 |         self._predict_confidence = pred
461 | 
462 | class SampledSequence(keras.utils.Sequence):
463 |     def __init__(self, features, instruments, outputs, batch_size, sampler, n_samples=1, seed=None):
464 |         self.rng = np.random.RandomState(seed)
465 |         if not isinstance(features, list):
466 |             features = [features.copy()]
467 |         else:
468 |             features = [f.copy() for f in features]
469 |         self.features = features
470 |         self.instruments = instruments.copy()
471 |         self.outputs = outputs.copy()
472 |         if batch_size < self.instruments.shape[0]:
473 |             self.batch_size = batch_size
474 |         else:
475 |             self.batch_size = self.instruments.shape[0]
476 |         self.sampler = sampler
477 |         self.n_samples = n_samples
478 |         self.current_index = 0
479 |         self.shuffle()
480 | 
481 |     def __len__(self):
482 |         if isinstance(self.outputs, list):
483 |             return self.outputs[0].shape[0] // self.batch_size
484 |         else:
485 |             return self.outputs.shape[0] // self.batch_size
486 | 
487 |     def shuffle(self):
488 |         idx = self.rng.permutation(np.arange(self.instruments.shape[0]))
489 |         self.instruments = self.instruments[idx,:]
490 |         self.outputs = self.outputs[idx,:]
491 |         self.features = [f[idx,:] for f in self.features]
492 |     
493 |     def __getitem__(self,idx):
494 |         instruments = [self.instruments[idx*self.batch_size:(idx+1)*self.batch_size, :]]
495 |         features = [inp[idx*self.batch_size:(idx+1)*self.batch_size, :] for inp in self.features]
496 |         sampler_input = instruments + features
497 |         samples = self.sampler(sampler_input, self.n_samples)
498 |         batch_features = [f[idx*self.batch_size:(idx+1)*self.batch_size].repeat(self.n_samples, axis=0) for f in self.features] + [samples]
499 |         batch_y = self.outputs[idx*self.batch_size:(idx+1)*self.batch_size].repeat(self.n_samples, axis=0)
500 |         if idx == (len(self) - 1):
501 |             self.shuffle()
502 |         return batch_features, batch_y
503 | 
504 | class OnesidedUnbaised(SampledSequence):
505 |     def __init__(self, features, instruments, outputs, treatments, batch_size, sampler, n_samples=1, seed=None):
506 |         self.rng = np.random.RandomState(seed)
507 |         if not isinstance(features, list):
508 |             features = [features.copy()]
509 |         else:
510 |             features = [f.copy() for f in features]
511 |         self.features = features
512 |         self.instruments = instruments.copy()
513 |         self.outputs = outputs.copy()
514 |         self.treatments = treatments.copy()
515 |         self.batch_size = batch_size
516 |         self.sampler = sampler
517 |         self.n_samples = n_samples
518 |         self.current_index = 0
519 |         self.shuffle()
520 | 
521 |     def shuffle(self):
522 |         idx = self.rng.permutation(np.arange(self.instruments.shape[0]))
523 |         self.instruments = self.instruments[idx,:]
524 |         self.outputs = self.outputs[idx,:]
525 |         self.features = [f[idx,:] for f in self.features]
526 |         self.treatments = self.treatments[idx,:]
527 | 
528 |     def __getitem__(self, idx):
529 |         instruments = [self.instruments[idx*self.batch_size:(idx+1)*self.batch_size, :]]
530 |         features = [inp[idx*self.batch_size:(idx+1)*self.batch_size, :] for inp in self.features]
531 |         observed_treatments = self.treatments[idx*self.batch_size:(idx+1)*self.batch_size, :]
532 |         sampler_input = instruments + features
533 |         samples = self.sampler(sampler_input, self.n_samples // 2)
534 |         samples = np.concatenate([observed_treatments, samples], axis=0)
535 |         batch_features = [f[idx*self.batch_size:(idx+1)*self.batch_size].repeat(self.n_samples, axis=0) for f in self.features] + [samples]
536 |         batch_y = self.outputs[idx*self.batch_size:(idx+1)*self.batch_size].repeat(self.n_samples, axis=0)
537 |         if idx == (len(self) - 1):
538 |             self.shuffle()
539 |         return batch_features, batch_y
540 | 
541 | ######### import DeepIV_True.architectures as architectures
542 | #new#
543 | def binary_crossentropy_output(x):
544 |     pi = keras.layers.Dense(1, activation='softmax')(x)
545 |     return pi
546 | 
547 | def mixture_of_gaussian_output(x, n_components):
548 |     mu = keras.layers.Dense(n_components, activation='linear')(x)
549 |     log_sig = keras.layers.Dense(n_components, activation='linear')(x)
550 |     pi = keras.layers.Dense(n_components, activation='softmax')(x)
551 |     return Concatenate(axis=1)([pi, mu, log_sig])
552 | 
553 | def feed_forward_net(input, output, hidden_layers=[64, 64], activations='relu',
554 |                      dropout_rate=0., l2=0., constrain_norm=False):
555 |     '''
556 |     Helper function for building a Keras feed forward network.
557 | 
558 |     input:  Keras Input object appropriate for the data. e.g. input=Input(shape=(20,))
559 |     output: Function representing final layer for the network that maps from the last
560 |             hidden layer to output.
561 |             e.g. if output = Dense(10, activation='softmax') if we're doing 10 class
562 |             classification or output = Dense(1, activation='linear') if we're doing
563 |             regression.
564 |     '''
565 |     state = input
566 |     if isinstance(activations, str):
567 |         activations = [activations] * len(hidden_layers)
568 |     
569 |     for h, a in zip(hidden_layers, activations):
570 |         if l2 > 0.:
571 |             w_reg = keras.regularizers.l2(l2)
572 |         else:
573 |             w_reg = None
574 |         const = maxnorm(2) if constrain_norm else  None
575 |         state = Dense(h, activation=a, kernel_regularizer=w_reg, kernel_constraint=const)(state)
576 |         if dropout_rate > 0.:
577 |             state = Dropout(dropout_rate)(state)
578 |     return output(state)
579 | 
580 | class DeepIV(object):
581 |     def __init__(self) -> None:
582 |         self.config = {
583 |                     'methodName': 'DeepIV',
584 |                     'dropout': 0.5,
585 |                     'epochs': 10,
586 |                     'batch_size': 100,
587 |                     'n_components': 5,
588 |                     'layers': [128, 64, 32], 
589 |                     'activation': 'relu',
590 |                     'samples_per_batch': 2,
591 |                     't_loss': 'mixture_of_gaussians',
592 |                     'y_loss': 'mse',
593 |                     'seed': 2022,   
594 |                     }
595 | 
596 |     def set_Configuration(self, config):
597 |         self.config = config
598 | 
599 |     def fit(self, data, exp=-1, config=None):
600 |         if config is None:
601 |             config = self.config
602 | 
603 |         set_seed(config['seed'])
604 |         clear_session()
605 |         tf.reset_default_graph()
606 |         random.seed(config['seed'])
607 |         tf.compat.v1.set_random_seed(config['seed'])
608 |         np.random.seed(config['seed'])
609 |         data.numpy()
610 | 
611 |         config['num'] = data.train.length
612 | 
613 |         tfconfig = tf.ConfigProto()
614 |         tfconfig.gpu_options.allow_growth=True
615 |         sess = tf.Session(config=tfconfig)
616 |         K.set_session(sess)
617 | 
618 |         dropout_rate = min(1000./(1000. + config['num']), config['dropout'])
619 |         epochs = min(int(1000000./float(config['num'])), config['epochs'])
620 | 
621 |         instruments = Input(shape=(data.train.z.shape[1],), name="instruments")
622 |         features = Input(shape=(data.train.x.shape[1],), name="features")
623 |         treatment_input = Concatenate(axis=1)([instruments, features])
624 | 
625 |         est_treat = feed_forward_net(treatment_input, lambda x: mixture_of_gaussian_output(x, config['n_components']),
626 |                                                 hidden_layers=config['layers'],
627 |                                                 dropout_rate=dropout_rate, l2=0.0001,
628 |                                                 activations=config['activation'])
629 | 
630 | 
631 |         treatment_model = Treatment(inputs=[instruments, features], outputs=est_treat)
632 |         treatment_model.compile('adam', loss=config['t_loss'], n_components=config['n_components'])
633 | 
634 |         treatment_model.fit([data.train.z, data.train.x], data.train.t, epochs=epochs, batch_size=config['batch_size'])
635 | 
636 |         treatment = Input(shape=(data.train.t.shape[1],), name="treatment")
637 |         response_input = Concatenate(axis=1)([features, treatment])
638 | 
639 |         est_response = feed_forward_net(response_input, Dense(1),
640 |                                                         activations=config['activation'],
641 |                                                         hidden_layers=config['layers'],
642 |                                                         l2=0.001,
643 |                                                         dropout_rate=dropout_rate)
644 | 
645 |         response_model = Response(treatment=treatment_model,
646 |                                     inputs=[features, treatment],
647 |                                     outputs=est_response)
648 |         response_model.compile('adam', loss=config['y_loss'])
649 | 
650 |         print('Run {}-th experiment for {}. '.format(exp, config['methodName']))
651 | 
652 |         response_model.fit([data.train.z, data.train.x], data.train.y, epochs=epochs, verbose=1,
653 |                             batch_size=config['batch_size'], samples_per_batch=config['samples_per_batch'])
654 | 
655 |         def estimation(data):
656 |             return response_model.predict([data.x, data.t-data.t]), response_model.predict([data.x, data.t])
657 | 
658 |         print('End. ' + '-'*20)
659 | 
660 |         self.estimation = estimation
661 |         self.response_model = response_model
662 | 
663 |     def predict(self, data=None, t=None, x=None):
664 |         if data is None:
665 |             data = self.data.test
666 | 
667 |         if x is None:
668 |             x = data.x
669 | 
670 |         if t is None:
671 |             t = data.t
672 | 
673 |         return self.response_model.predict([x, t])
674 | 
675 |     def ITE(self, data=None, t=None, x=None):
676 |         if data is None:
677 |             data = self.data.test
678 | 
679 |         if x is None:
680 |             x = data.x
681 | 
682 |         if t is None:
683 |             t = data.t
684 | 
685 |         ITE_0 = self.response_model.predict([x, t-t])
686 |         ITE_1 = self.response_model.predict([x, t-t+1])
687 |         ITE_t = self.response_model.predict([x, t])
688 | 
689 |         return ITE_0,ITE_1,ITE_t
690 | 
691 |     def ATE(self, data=None, t=None, x=None):
692 |         ITE_0,ITE_1,ITE_t = self.ITE(data,t,x)
693 | 
694 |         return np.mean(ITE_1-ITE_0), np.mean(ITE_t-ITE_0)
695 | 
696 | 


--------------------------------------------------------------------------------
/mliv/inference/deep/dfiv_v1.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | import numpy as np
  4 | from mliv.utils import set_seed, cat, split
  5 | 
  6 | example = '''
  7 | from mliv.inference import DFIV
  8 | 
  9 | model = DFIV()
 10 | model.fit(data)
 11 | ITE = model.predict(data.train)
 12 | ATE,_ = model.ATE(data.train)
 13 | '''
 14 | 
 15 | ############# Define Networks ################
 16 | def build_net(t_input_dim, z_input_dim, x_input_dim):
 17 |     treatment_net = nn.Sequential(nn.Linear(t_input_dim, 16),
 18 |                                   nn.ReLU(),
 19 |                                   nn.Linear(16, 1))
 20 | 
 21 |     instrumental_net = nn.Sequential(nn.Linear(z_input_dim+x_input_dim, 128),
 22 |                                      nn.ReLU(),
 23 |                                      nn.Linear(128, 64),
 24 |                                      nn.ReLU(),
 25 |                                      nn.Linear(64, 32),
 26 |                                      nn.BatchNorm1d(32))
 27 | 
 28 |     covariate_net = nn.Sequential(nn.Linear(x_input_dim, 128),
 29 |                                   nn.ReLU(),
 30 |                                   nn.Linear(128, 32),
 31 |                                   nn.BatchNorm1d(32),
 32 |                                   nn.ReLU(),
 33 |                                   nn.Linear(32, 16),
 34 |                                   nn.ReLU())
 35 |     
 36 |     return treatment_net, instrumental_net, covariate_net
 37 | 
 38 | ################## Define loss #################################
 39 | def fit_linear(target: torch.Tensor, feature: torch.Tensor, reg: float = 0.0):
 40 |     nData, nDim = feature.size()
 41 |     A = torch.matmul(feature.t(), feature)
 42 |     device = feature.device
 43 |     A = A + reg * torch.eye(nDim, device=device)
 44 |     A_inv = torch.inverse(A)
 45 |     if target.dim() == 2:
 46 |         b = torch.matmul(feature.t(), target)
 47 |         weight = torch.matmul(A_inv, b)
 48 |     else:
 49 |         b = torch.einsum("nd,n...->d...", feature, target)
 50 |         weight = torch.einsum("de,d...->e...", A_inv, b)
 51 |     return weight
 52 | 
 53 | def linear_reg_pred(feature: torch.Tensor, weight: torch.Tensor):
 54 |     if weight.dim() == 2:
 55 |         return torch.matmul(feature, weight)
 56 |     else:
 57 |         return torch.einsum("nd,d...->n...", feature, weight)
 58 | 
 59 | def linear_reg_loss(target: torch.Tensor, feature: torch.Tensor, reg: float):
 60 |     weight = fit_linear(target, feature, reg)
 61 |     pred = linear_reg_pred(feature, weight)
 62 |     return torch.norm((target - pred)) ** 2 + reg * torch.norm(weight) ** 2
 63 | 
 64 | ############ Define Utils #####################
 65 | def add_const_col(mat: torch.Tensor):
 66 |     n_data = mat.size()[0]
 67 |     device = mat.device
 68 |     return torch.cat([mat, torch.ones((n_data, 1), device=device)], dim=1)
 69 | 
 70 | def augment_z_feature(feature, add_intercept):
 71 |     if add_intercept: feature = add_const_col(feature)
 72 |     return feature
 73 | 
 74 | def augment_tx_feature(feature, feature_tmp, add_intercept):
 75 |     if add_intercept: feature = add_const_col(feature)
 76 |     if add_intercept: feature_tmp = add_const_col(feature_tmp)
 77 |     feature = outer_prod(feature, feature_tmp)
 78 |     feature = torch.flatten(feature, start_dim=1)
 79 |     return feature
 80 | 
 81 | def outer_prod(mat1: torch.Tensor, mat2: torch.Tensor):
 82 |     mat1_shape = tuple(mat1.size())
 83 |     mat2_shape = tuple(mat2.size())
 84 |     assert mat1_shape[0] == mat2_shape[0]
 85 |     nData = mat1_shape[0]
 86 |     aug_mat1_shape = mat1_shape + (1,) * (len(mat2_shape) - 1)
 87 |     aug_mat1 = torch.reshape(mat1, aug_mat1_shape)
 88 |     aug_mat2_shape = (nData,) + (1,) * (len(mat1_shape) - 1) + mat2_shape[1:]
 89 |     aug_mat2 = torch.reshape(mat2, aug_mat2_shape)
 90 |     return aug_mat1 * aug_mat2
 91 | 
 92 | class DFIVTrainer(object):
 93 | 
 94 |     def __init__(self, data, train_dict):
 95 | 
 96 |         device = train_dict['device']
 97 |         device = device if train_dict['GPU'] and torch.cuda.is_available() else "cpu"
 98 |         self.device = device
 99 | 
100 |         data.tensor()
101 |         data.to(device)
102 |         data.split(train_dict["split_ratio"])
103 |         self.data = data
104 | 
105 |         self.t_loss = train_dict['t_loss']
106 |         self.y_loss = train_dict['y_loss']
107 |         self.gpu_flg = train_dict['GPU'] and torch.cuda.is_available()
108 |         self.add_intercept = train_dict['intercept']
109 |         self.n_epoch = train_dict["epochs"]
110 |         self.lam1 = train_dict["lam1"]
111 |         self.lam2 = train_dict["lam2"]
112 |         self.stage1_iter = train_dict["stage1_iter"]
113 |         self.stage2_iter = train_dict["stage2_iter"]
114 |         self.covariate_iter = train_dict["covariate_iter"]
115 |         self.split_ratio = train_dict["split_ratio"]
116 |         self.treatment_weight_decay = train_dict["treatment_weight_decay"]
117 |         self.instrumental_weight_decay = train_dict["instrumental_weight_decay"]
118 |         self.covariate_weight_decay = train_dict["covariate_weight_decay"]
119 |         self.verbose = train_dict["verbose"]
120 |         self.show_per_epoch = train_dict["show_per_epoch"]
121 | 
122 |         self.treatment_net, self.instrumental_net, self.covariate_net = build_net(train_dict['t_dim'], train_dict['z_dim'], train_dict['x_dim'])
123 |         if self.gpu_flg:
124 |             self.treatment_net.to(device)
125 |             self.instrumental_net.to(device)
126 |             self.covariate_net.to(device)
127 |         self.treatment_opt = torch.optim.Adam(self.treatment_net.parameters(),weight_decay=self.treatment_weight_decay)
128 |         self.instrumental_opt = torch.optim.Adam(self.instrumental_net.parameters(),weight_decay=self.instrumental_weight_decay)
129 |         self.covariate_opt = torch.optim.Adam(self.covariate_net.parameters(),weight_decay=self.covariate_weight_decay)
130 |     
131 |     def train(self, verbose=None, show_per_epoch=None):
132 |         if verbose is None or show_per_epoch is None:
133 |             verbose, show_per_epoch = self.verbose, self.show_per_epoch
134 | 
135 |         self.lam1 *= self.data.data1.length
136 |         self.lam2 *= self.data.data2.length
137 | 
138 |         for exp in range(self.n_epoch):
139 |             self.stage1_update(self.data.data1, verbose)
140 |             self.covariate_update(self.data.data1, self.data.data2, verbose)
141 |             self.stage2_update(self.data.data1, self.data.data2, verbose)
142 |             if exp % show_per_epoch == 0 or exp == self.n_epoch - 1:
143 |                 if verbose >= 1: 
144 |                     pred_0x2y, pred_tx2y = self.estimation4tx(self.data.valid)
145 |                     mse_y = ((pred_tx2y - self.data.valid.y) ** 2).mean()
146 |                     mse_g = ((pred_tx2y - self.data.valid.g) ** 2).mean()
147 |                     print(f"Epoch {exp} ended: {mse_y}, {mse_g}. ")
148 | 
149 |     def stage1_update(self, train_1st, verbose):
150 |         self.instrumental_net.train(True)
151 |         self.treatment_net.train(False)
152 |         self.covariate_net.train(False)
153 | 
154 |         treatment_feature = self.treatment_net(train_1st.t).detach()
155 |         for i in range(self.stage1_iter):
156 |             self.instrumental_opt.zero_grad()
157 |             instrumental_feature = self.instrumental_net(cat([train_1st.z,train_1st.x]))
158 |             feature = augment_z_feature(instrumental_feature, self.add_intercept)
159 |             loss = linear_reg_loss(treatment_feature, feature, self.lam1)
160 |             loss.backward()
161 |             if verbose >= 2: print(f"stage1 learning: {loss.item()}")
162 |             self.instrumental_opt.step()
163 | 
164 |     def covariate_update(self, train_1st, train_2nd, verbose):
165 |         self.instrumental_net.train(False)
166 |         self.treatment_net.train(False)
167 |         self.covariate_net.train(True)
168 | 
169 |         instrumental_1st_feature = self.instrumental_net(cat([train_1st.z,train_1st.x])).detach()
170 |         instrumental_2nd_feature = self.instrumental_net(cat([train_2nd.z,train_2nd.x])).detach()
171 |         treatment_1st_feature = self.treatment_net(train_1st.t).detach()
172 | 
173 |         feature_1st = augment_z_feature(instrumental_1st_feature, self.add_intercept)
174 |         feature_2nd = augment_z_feature(instrumental_2nd_feature, self.add_intercept)
175 |         self.stage1_weight = fit_linear(treatment_1st_feature, feature_1st, self.lam1)
176 |         predicted_treatment_feature_2nd = linear_reg_pred(feature_2nd, self.stage1_weight).detach()
177 | 
178 |         for i in range(self.covariate_iter):
179 |             self.covariate_opt.zero_grad()
180 |             covariate_feature = self.covariate_net(train_2nd.x)
181 |             feature = augment_tx_feature(predicted_treatment_feature_2nd, covariate_feature, self.add_intercept)
182 |             loss = linear_reg_loss(train_2nd.y, feature, self.lam2)
183 |             loss.backward()
184 |             if verbose >= 2: print(f"update covariate: {loss.item()}")
185 |             self.covariate_opt.step()
186 | 
187 |     def stage2_update(self, train_1st, train_2nd, verbose):
188 |         self.instrumental_net.train(False)
189 |         self.treatment_net.train(True)
190 |         self.covariate_net.train(False)
191 |         
192 |         instrumental_1st_feature = self.instrumental_net(cat([train_1st.z,train_1st.x])).detach()
193 |         instrumental_2nd_feature = self.instrumental_net(cat([train_2nd.z,train_2nd.x])).detach()
194 |         covariate_2nd_feature = self.covariate_net(train_2nd.x).detach()
195 | 
196 |         for i in range(self.stage2_iter):
197 |             self.treatment_opt.zero_grad()
198 |             treatment_1st_feature = self.treatment_net(train_1st.t)
199 | 
200 |             feature_1st = augment_z_feature(instrumental_1st_feature, self.add_intercept)
201 |             feature_2nd = augment_z_feature(instrumental_2nd_feature, self.add_intercept)
202 |             self.stage1_weight = fit_linear(treatment_1st_feature, feature_1st, self.lam1)
203 |             predicted_treatment_feature = linear_reg_pred(feature_2nd, self.stage1_weight)
204 | 
205 |             feature = augment_tx_feature(predicted_treatment_feature, covariate_2nd_feature, self.add_intercept)
206 |             self.stage2_weight = fit_linear(train_2nd.y, feature, self.lam2)
207 |             pred = linear_reg_pred(feature, self.stage2_weight)
208 |             loss = torch.norm((train_2nd.y - pred)) ** 2 + self.lam2 * torch.norm(self.stage2_weight) ** 2
209 | 
210 |             loss.backward()
211 |             if verbose >= 2: print(f"stage2 learning: {loss.item()}")
212 |             self.treatment_opt.step()
213 | 
214 |     def estimation4tx(self, data, update_weight1=False, update_weight2=False):
215 |         self.instrumental_net.train(False)
216 |         self.treatment_net.train(False)
217 |         self.covariate_net.train(False)
218 | 
219 |         instrumental_feature = self.instrumental_net(cat([data.z,data.x])).detach()
220 |         treatment_feature = self.treatment_net(data.t).detach()
221 |         treatment_feature_0 = self.treatment_net(data.t-data.t).detach()
222 |         covariate_feature = self.covariate_net(data.x).detach()
223 | 
224 |         feature_stage1 = augment_z_feature(instrumental_feature, self.add_intercept)
225 |         if update_weight1: self.stage1_weight = fit_linear(treatment_feature, feature_stage1, self.lam1)
226 |         predicted_treatment_feature = linear_reg_pred(feature_stage1, self.stage1_weight)
227 | 
228 |         feature_stage2_tx2y = augment_tx_feature(treatment_feature, covariate_feature, self.add_intercept)
229 |         if update_weight2: self.stage2_weight = fit_linear(data.y, feature_stage2_tx2y, self.lam2)
230 |         pred_tx2y = linear_reg_pred(feature_stage2_tx2y, self.stage2_weight)
231 | 
232 |         feature_stage2_0x2y = augment_tx_feature(treatment_feature_0, covariate_feature, self.add_intercept)
233 |         if update_weight2: self.stage2_weight = fit_linear(data.y, feature_stage2_0x2y, self.lam2)
234 |         pred_0x2y = linear_reg_pred(feature_stage2_0x2y, self.stage2_weight)
235 | 
236 |         return pred_0x2y, pred_tx2y
237 | 
238 |     def estimation4zx(self, data, update_weight1=False, update_weight2=False):
239 |         self.instrumental_net.train(False)
240 |         self.treatment_net.train(False)
241 |         self.covariate_net.train(False)
242 | 
243 |         instrumental_feature = self.instrumental_net(cat([data.z,data.x])).detach()
244 |         treatment_feature = self.treatment_net(data.t).detach()
245 |         covariate_feature = self.covariate_net(data.x).detach()
246 | 
247 |         feature_stage1 = augment_z_feature(instrumental_feature, self.add_intercept)
248 |         if update_weight1: self.stage1_weight = fit_linear(treatment_feature, feature_stage1, self.lam1)
249 |         predicted_treatment_feature = linear_reg_pred(feature_stage1, self.stage1_weight)
250 | 
251 |         feature_stage2_zx2y = augment_tx_feature(predicted_treatment_feature, covariate_feature, self.add_intercept)
252 |         if update_weight2: self.stage2_weight = fit_linear(data.y, feature_stage2_zx2y, self.lam2)
253 |         pred_zx2y = linear_reg_pred(feature_stage2_zx2y, self.stage2_weight)
254 | 
255 |         return pred_zx2y
256 | 
257 | class DFIV(object):
258 |     def __init__(self) -> None:
259 |         self.config = {
260 |                     'methodName': 'DFIV',
261 |                     't_loss': 'mse',
262 |                     'y_loss': 'mse',
263 |                     'device': 'cuda:0',
264 |                     'GPU': True,
265 |                     'intercept': True, 
266 |                     "epochs": 100,
267 |                     'lam1': 0.1,
268 |                     'lam2': 0.1,
269 |                     'stage1_iter': 20,
270 |                     'stage2_iter': 1,
271 |                     'covariate_iter': 20,
272 |                     'split_ratio': 0.5,
273 |                     'treatment_weight_decay': 0.0,
274 |                     'instrumental_weight_decay': 0.0,
275 |                     'covariate_weight_decay': 0.1,
276 |                     'verbose': 1,
277 |                     'show_per_epoch': 20,
278 |                     'seed': 2022,   
279 |                     }
280 | 
281 |     def set_Configuration(self, config):
282 |         self.config = config
283 | 
284 |     def fit(self, data, exp=-1, config=None):
285 |         if config is None:
286 |             config = self.config
287 | 
288 |         set_seed(config['seed'])
289 |         data.numpy()
290 | 
291 |         self.z_dim = data.train.z.shape[1]
292 |         self.x_dim = data.train.x.shape[1]
293 |         self.t_dim = data.train.t.shape[1]
294 | 
295 |         config['z_dim'] = self.z_dim
296 |         config['x_dim'] = self.x_dim
297 |         config['t_dim'] = self.t_dim
298 | 
299 |         print('Run {}-th experiment for {}. '.format(exp, config['methodName']))
300 | 
301 |         trainer = DFIVTrainer(data, config)
302 |         trainer.train()
303 | 
304 |         print('End. ' + '-'*20)
305 | 
306 |         self.estimation = trainer.estimation4tx
307 |         self.nets = trainer
308 | 
309 |     def predict(self, data=None, t=None, x=None):
310 |         if data is None:
311 |             data = self.data.test
312 | 
313 |         if x is None:
314 |             x = data.x
315 | 
316 |         if t is None:
317 |             t = data.t
318 | 
319 |         with torch.no_grad():
320 |             treatment_feature = self.nets.treatment_net(t).detach()
321 |             covariate_feature = self.nets.covariate_net(x).detach()
322 |             feature_stage2_tx2y = augment_tx_feature(treatment_feature, covariate_feature, self.nets.add_intercept)
323 |             pred_tx2y = linear_reg_pred(feature_stage2_tx2y, self.nets.stage2_weight).detach().cpu().numpy()
324 |         
325 |         return pred_tx2y
326 | 
327 |     def ITE(self, data=None, t=None, x=None):
328 |         if data is None:
329 |             data = self.data.test
330 | 
331 |         if x is None:
332 |             x = data.x
333 | 
334 |         if t is None:
335 |             t = data.t
336 | 
337 |         with torch.no_grad():
338 |             feature_0 = self.nets.treatment_net(t-t).detach()
339 |             feature_1 = self.nets.treatment_net(t-t+1).detach()
340 |             feature_t = self.nets.treatment_net(t).detach()
341 |             x_feature = self.nets.covariate_net(x).detach()
342 | 
343 |             feature_0x = augment_tx_feature(feature_0, x_feature, self.nets.add_intercept)
344 |             feature_1x = augment_tx_feature(feature_1, x_feature, self.nets.add_intercept)
345 |             feature_tx = augment_tx_feature(feature_t, x_feature, self.nets.add_intercept)
346 | 
347 |             ITE_0 = linear_reg_pred(feature_0x, self.nets.stage2_weight).detach().cpu().numpy()
348 |             ITE_1 = linear_reg_pred(feature_1x, self.nets.stage2_weight).detach().cpu().numpy()
349 |             ITE_t = linear_reg_pred(feature_tx, self.nets.stage2_weight).detach().cpu().numpy()
350 | 
351 |         return ITE_0,ITE_1,ITE_t
352 | 
353 |     def ATE(self, data=None, t=None, x=None):
354 |         ITE_0,ITE_1,ITE_t = self.ITE(data,t,x)
355 | 
356 |         return np.mean(ITE_1-ITE_0), np.mean(ITE_t-ITE_0)
357 | 


--------------------------------------------------------------------------------
/mliv/inference/dflearning/__init__.py:
--------------------------------------------------------------------------------
1 | from .dfl_v1 import DFL


--------------------------------------------------------------------------------
/mliv/inference/dflearning/dfl_v1.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, Any, List, NamedTuple, TYPE_CHECKING, Optional
  2 | import torch
  3 | from torch import nn
  4 | import numpy as np
  5 | from pathlib import Path
  6 | from mliv.utils import set_seed
  7 | 
  8 | example = '''
  9 | from mliv.inference import DFL
 10 | 
 11 | model = DFL()
 12 | model.fit(data)
 13 | ITE = model.predict(data.train)
 14 | ATE,_ = model.ATE(data.train)
 15 | '''
 16 | 
 17 | ############ from .data_class import TrainDataSet, TestDataSet, TrainDataSetTorch, TestDataSetTorch
 18 | class TrainDataSet(NamedTuple):
 19 |     treatment: np.ndarray
 20 |     instrumental: np.ndarray
 21 |     covariate: Optional[np.ndarray]
 22 |     outcome: np.ndarray
 23 |     structural: np.ndarray
 24 | 
 25 | class TestDataSet(NamedTuple):
 26 |     treatment: np.ndarray
 27 |     covariate: Optional[np.ndarray]
 28 |     structural: np.ndarray
 29 |     instrumental: Optional[np.ndarray]
 30 |     outcome: Optional[np.ndarray]
 31 | 
 32 | class TrainDataSetTorch(NamedTuple):
 33 |     treatment: torch.Tensor
 34 |     instrumental: torch.Tensor
 35 |     covariate: torch.Tensor
 36 |     outcome: torch.Tensor
 37 |     structural: torch.Tensor
 38 | 
 39 |     @classmethod
 40 |     def from_numpy(cls, train_data: TrainDataSet):
 41 |         covariate = None
 42 |         if train_data.covariate is not None:
 43 |             covariate = torch.tensor(train_data.covariate, dtype=torch.float32)
 44 |         return TrainDataSetTorch(treatment=torch.tensor(train_data.treatment, dtype=torch.float32),
 45 |                                  instrumental=torch.tensor(train_data.instrumental, dtype=torch.float32),
 46 |                                  covariate=covariate,
 47 |                                  outcome=torch.tensor(train_data.outcome, dtype=torch.float32),
 48 |                                  structural=torch.tensor(train_data.structural, dtype=torch.float32))
 49 | 
 50 |     def to(self, device):
 51 |         covariate = None
 52 |         if self.covariate is not None:
 53 |             covariate = self.covariate.to(device)
 54 |         return TrainDataSetTorch(treatment=self.treatment.to(device),
 55 |                                  instrumental=self.instrumental.to(device),
 56 |                                  covariate=covariate,
 57 |                                  outcome=self.outcome.to(device),
 58 |                                  structural=self.structural.to(device))
 59 | 
 60 | class TestDataSetTorch(NamedTuple):
 61 |     treatment: torch.Tensor
 62 |     instrumental: torch.Tensor
 63 |     covariate: torch.Tensor
 64 |     outcome: torch.Tensor
 65 |     structural: torch.Tensor
 66 | 
 67 |     @classmethod
 68 |     def from_numpy(cls, test_data: TestDataSet):
 69 |         covariate = None
 70 |         instrumental = None
 71 |         outcome = None
 72 |         if test_data.covariate is not None:
 73 |             covariate = torch.tensor(test_data.covariate, dtype=torch.float32)
 74 |         if test_data.instrumental is not None:
 75 |             instrumental = torch.tensor(test_data.instrumental, dtype=torch.float32)
 76 |         if test_data.outcome is not None:
 77 |             outcome = torch.tensor(test_data.outcome, dtype=torch.float32)
 78 |         return TestDataSetTorch(treatment=torch.tensor(test_data.treatment, dtype=torch.float32),
 79 |                                 covariate=covariate,
 80 |                                 instrumental=instrumental,
 81 |                                 outcome=outcome,
 82 |                                 structural=torch.tensor(test_data.structural, dtype=torch.float32))
 83 |     def to(self, device):
 84 |         covariate = None
 85 |         instrumental = None
 86 |         outcome = None
 87 |         if self.covariate is not None:
 88 |             covariate = self.covariate.to(device)
 89 |         if self.instrumental is not None:
 90 |             instrumental = self.instrumental.to(device)
 91 |         if self.outcome is not None:
 92 |             outcome = self.outcome.to(device)
 93 |         return TestDataSetTorch(treatment=self.treatment.to(device),
 94 |                                 covariate=covariate,
 95 |                                 instrumental=instrumental,
 96 |                                 outcome=outcome,
 97 |                                 structural=self.structural.to(device))
 98 | 
 99 | ############ from .utils.pytorch_linear_reg_utils import linear_reg_loss, fit_linear, linear_reg_pred, outer_prod, add_const_col
100 | def inv_logit_np(x):
101 |     return np.log(x / (1-x))
102 | 
103 | def logit_np(x):
104 |     return 1 / (1 + np.exp(-x))
105 | 
106 | def inv_logit(x):
107 |     return torch.log(x / (1-x))
108 | 
109 | def logit(x):
110 |     return 1 / (1 + torch.exp(-x))
111 | 
112 | def linear_log_loss(target: torch.Tensor,
113 |                     feature: torch.Tensor,
114 |                     reg: float):
115 |     weight = fit_linear(target, feature, reg)
116 |     pred = linear_reg_pred(feature, weight)
117 |     
118 |     labels = logit(target)
119 |     logits = logit(pred)
120 |     return (-(torch.log(logits) * labels+torch.log(1-logits) * (1-labels))).sum() + reg * torch.norm(weight) ** 2
121 | 
122 | def linear_reg_loss(target: torch.Tensor,
123 |                     feature: torch.Tensor,
124 |                     reg: float):
125 |     weight = fit_linear(target, feature, reg)
126 |     pred = linear_reg_pred(feature, weight)
127 |     return torch.norm((target - pred)) ** 2 + reg * torch.norm(weight) ** 2
128 | 
129 | def fit_linear(target: torch.Tensor,
130 |                feature: torch.Tensor,
131 |                reg: float = 0.0):
132 |     assert feature.dim() == 2
133 |     assert target.dim() >= 2
134 |     nData, nDim = feature.size()
135 |     A = torch.matmul(feature.t(), feature)
136 |     device = feature.device
137 |     A = A + reg * torch.eye(nDim, device=device)
138 |     # U = torch.cholesky(A)
139 |     # A_inv = torch.cholesky_inverse(U)
140 |     #TODO use cholesky version in the latest pytorch
141 |     A_inv = torch.inverse(A)
142 |     if target.dim() == 2:
143 |         b = torch.matmul(feature.t(), target)
144 |         weight = torch.matmul(A_inv, b)
145 |     else:
146 |         b = torch.einsum("nd,n...->d...", feature, target)
147 |         weight = torch.einsum("de,d...->e...", A_inv, b)
148 |     return weight
149 | 
150 | def linear_reg_pred(feature: torch.Tensor, weight: torch.Tensor):
151 |     assert weight.dim() >= 2
152 |     if weight.dim() == 2:
153 |         return torch.matmul(feature, weight)
154 |     else:
155 |         return torch.einsum("nd,d...->n...", feature, weight)
156 | 
157 | def outer_prod(mat1: torch.Tensor, mat2: torch.Tensor):
158 |     mat1_shape = tuple(mat1.size())
159 |     mat2_shape = tuple(mat2.size())
160 |     assert mat1_shape[0] == mat2_shape[0]
161 |     nData = mat1_shape[0]
162 |     aug_mat1_shape = mat1_shape + (1,) * (len(mat2_shape) - 1)
163 |     aug_mat1 = torch.reshape(mat1, aug_mat1_shape)
164 |     aug_mat2_shape = (nData,) + (1,) * (len(mat1_shape) - 1) + mat2_shape[1:]
165 |     aug_mat2 = torch.reshape(mat2, aug_mat2_shape)
166 |     return aug_mat1 * aug_mat2
167 | 
168 | def add_const_col(mat: torch.Tensor):
169 |     assert mat.dim() == 2
170 |     n_data = mat.size()[0]
171 |     device = mat.device
172 |     return torch.cat([mat, torch.ones((n_data, 1), device=device)], dim=1)
173 | 
174 | ######### Monitor
175 | class DFLMonitor:
176 |     train_data_t: TrainDataSetTorch
177 |     test_data_t: TestDataSetTorch
178 |     validation_data_t: TrainDataSetTorch
179 | 
180 |     def __init__(self, t_loss, y_loss, dump_folder, trainer):
181 | 
182 |         self.t_loss = t_loss
183 |         self.y_loss = y_loss
184 |         self.metrics = {"stage1_insample_loss": [],
185 |                         "stage1_outsample_loss": [],
186 |                         "stage2_insample_loss": [],
187 |                         "stage2_outsample_loss": [],
188 |                         "test_loss": []}
189 | 
190 |         self.dump_folder = dump_folder
191 |         self.trainer = trainer
192 |         
193 |         ##################################################### begin: t_loss = 'bin'
194 |         if self.t_loss == 'bin':
195 |             self.val_best = 99999
196 |             self.pred_ate_train_best = 99999
197 |             self.pred_ate_test_best = 99999
198 | 
199 |             self.pred_ate_train_final = 99999
200 |             self.pred_ate_test_final = 99999
201 |         #####################################################
202 |         else:
203 |             self.train_y_pred = None
204 |             self.val_y_pred = None
205 |             self.test_y_pred = None
206 | 
207 |     def configure_data(self, train_data_t: TrainDataSetTorch,
208 |                        test_data_t: TestDataSetTorch,
209 |                        validation_data_t: TrainDataSetTorch):
210 | 
211 |         self.train_data_t = train_data_t
212 |         self.test_data_t = test_data_t
213 |         self.validation_data_t = validation_data_t
214 | 
215 |     def record(self, verbose: int):
216 |         self.trainer.treatment_net.train(False)
217 |         if self.trainer.covariate_net is not None:
218 |             self.trainer.covariate_net.train(False)
219 | 
220 |         n_train_data = self.train_data_t.treatment.size()[0]
221 |         n_val_data = self.validation_data_t.treatment.size()[0]
222 |         n_test_data = self.test_data_t.treatment.size()[0]
223 |         with torch.no_grad():
224 |             treatment_train_feature = self.trainer.treatment_net(self.train_data_t.treatment)
225 |             treatment_val_feature = self.trainer.treatment_net(self.validation_data_t.treatment)
226 |             treatment_test_feature = self.trainer.treatment_net(self.test_data_t.treatment)
227 | 
228 |             covariate_train_feature = None
229 |             covariate_val_feature = None
230 |             covariate_test_feature = None
231 |             if self.trainer.covariate_net is not None:
232 |                 covariate_train_feature = self.trainer.covariate_net(self.train_data_t.covariate)
233 |                 covariate_val_feature = self.trainer.covariate_net(self.validation_data_t.covariate)
234 |                 covariate_test_feature = self.trainer.covariate_net(self.test_data_t.covariate)
235 | 
236 |             # stage2
237 |             feature = DFIVModel.augment_stage2_feature(treatment_train_feature,
238 |                                                        covariate_train_feature,
239 |                                                        self.trainer.add_intercept)
240 | 
241 |             weight = fit_linear(self.train_data_t.outcome, feature, self.trainer.lam)
242 |             insample_pred = linear_reg_pred(feature, weight)
243 |             if self.y_loss == 'bin':
244 |                 labels = logit(self.train_data_t.outcome)
245 |                 logits = logit(insample_pred)
246 |                 insample_loss = (-(torch.log(logits) * labels+torch.log(1-logits) * (1-labels))).sum() / n_train_data
247 |             else:
248 |                 insample_loss = torch.norm(self.train_data_t.outcome - insample_pred) ** 2 / n_train_data
249 |                 ############################################################################## mse == norm ????
250 |                 # insample_loss = torch.norm(self.train_data_t.outcome - insample_pred) ** 2 / n_train_data
251 |                 ##############################################################################
252 | 
253 |             val_feature = DFIVModel.augment_stage2_feature(treatment_val_feature,
254 |                                                            covariate_val_feature,
255 |                                                            self.trainer.add_intercept)
256 |             outsample_pred = linear_reg_pred(val_feature, weight)
257 |             if self.y_loss == 'bin':
258 |                 labels = logit(self.validation_data_t.outcome)
259 |                 logits = logit(outsample_pred)
260 |                 outsample_loss = (-(torch.log(logits) * labels+torch.log(1-logits) * (1-labels))).sum() / n_val_data
261 |             else:
262 |                 outsample_loss = torch.norm(self.validation_data_t.outcome - outsample_pred) ** 2 / n_val_data
263 | 
264 |             # eval for test
265 |             test_feature = DFIVModel.augment_stage2_feature(treatment_test_feature,
266 |                                                             covariate_test_feature,
267 |                                                             self.trainer.add_intercept)
268 |             test_pred = linear_reg_pred(test_feature, weight)
269 |             if self.y_loss == 'bin':
270 |                 labels = logit(self.test_data_t.structural)
271 |                 logits = logit(test_pred)
272 |                 test_loss = (-(torch.log(logits) * labels+torch.log(1-logits) * (1-labels))).sum() / n_test_data
273 |             else:
274 |                 test_loss = torch.norm(self.test_data_t.structural - test_pred) ** 2 / n_test_data
275 | 
276 |             if verbose >= 1:
277 |                 print(f"insample_loss:{insample_loss.item()}")
278 |                 print(f"outsample_loss:{outsample_loss.item()}")
279 |                 print(f"test_loss:{test_loss.item()}")
280 | 
281 | 
282 |             ##################################################### begin: t_loss = 'bin'
283 |             if self.t_loss == 'bin':
284 |                 treatment0_train_feature = self.trainer.treatment_net(self.train_data_t.treatment-self.train_data_t.treatment)
285 |                 treatment0_test_feature = self.trainer.treatment_net(self.test_data_t.treatment-self.test_data_t.treatment)
286 |                 treatment1_train_feature = self.trainer.treatment_net(self.train_data_t.treatment-self.train_data_t.treatment+1)
287 |                 treatment1_test_feature = self.trainer.treatment_net(self.test_data_t.treatment-self.test_data_t.treatment+1)
288 | 
289 |                 test_feature1 = DFIVModel.augment_stage2_feature(treatment1_test_feature,
290 |                                                             covariate_test_feature,
291 |                                                             self.trainer.add_intercept)
292 |                 test_pred1 = linear_reg_pred(test_feature1, weight)
293 |                 test_feature0 = DFIVModel.augment_stage2_feature(treatment0_test_feature,
294 |                                                             covariate_test_feature,
295 |                                                             self.trainer.add_intercept)
296 |                 test_pred0 = linear_reg_pred(test_feature0, weight)
297 | 
298 |                 train_feature1 = DFIVModel.augment_stage2_feature(treatment1_train_feature,
299 |                                                             covariate_train_feature,
300 |                                                             self.trainer.add_intercept)
301 |                 train_pred1 = linear_reg_pred(train_feature1, weight)
302 |                 train_feature0 = DFIVModel.augment_stage2_feature(treatment0_train_feature,
303 |                                                             covariate_train_feature,
304 |                                                             self.trainer.add_intercept)
305 |                 train_pred0 = linear_reg_pred(train_feature0, weight)
306 | 
307 |                 if outsample_loss < self.val_best:
308 |                     print(f"val_best from {self.val_best} to {outsample_loss}.")
309 |                     self.val_best = outsample_loss
310 | 
311 |                     self.pred_ate_test_best = test_pred1.mean() - test_pred0.mean()
312 |                     self.pred_ate_train_best = train_pred1.mean() - train_pred0.mean()
313 | 
314 |                     print(f"train_ate_best: {self.pred_ate_train_best.item()}; test_ate_best: {self.pred_ate_test_best.item()}.")
315 | 
316 |                 self.pred_ate_test_final = test_pred1.mean() - test_pred0.mean()
317 |                 self.pred_ate_train_final = train_pred1.mean() - train_pred0.mean()
318 | 
319 |                 print(f"train_ate_final: {self.pred_ate_train_final.item()}; test_ate_final: {self.pred_ate_test_final.item()}.")
320 |             
321 |             #####################################################
322 |             else:
323 |                 self.train_y_pred = [insample_pred, self.train_data_t.outcome]
324 |                 self.val_y_pred = [outsample_pred, self.validation_data_t.outcome]
325 |                 self.test_y_pred = [test_pred, self.test_data_t.structural]
326 | 
327 | ########## DFLModel
328 | class DFLModel:
329 |     weight_mat: torch.Tensor
330 | 
331 |     def __init__(self,
332 |                  treatment_net: nn.Module,
333 |                  covariate_net: Optional[nn.Module],
334 |                  add_intercept: bool,
335 |                  device: str
336 |                  ):
337 |         self.treatment_net = treatment_net
338 |         self.covariate_net = covariate_net
339 |         self.add_intercept = add_intercept
340 |         self.device = device
341 | 
342 |     @staticmethod
343 |     def augment_feature(treatment_feature: torch.Tensor,
344 |                         covariate_feature: Optional[torch.Tensor],
345 |                         add_intercept: bool):
346 |         feature = treatment_feature
347 |         if add_intercept:
348 |             feature = add_const_col(feature)
349 | 
350 |         if covariate_feature is not None:
351 |             feature_tmp = covariate_feature
352 |             if add_intercept:
353 |                 feature_tmp = add_const_col(feature_tmp)
354 |             feature = outer_prod(feature, feature_tmp)
355 |             feature = torch.flatten(feature, start_dim=1)
356 | 
357 |         return feature
358 | 
359 |     @staticmethod
360 |     def fit_dfl(treatment_feature: torch.Tensor,
361 |                 covariate_feature: Optional[torch.Tensor],
362 |                 outcome_t: torch.Tensor,
363 |                 lam: float, add_intercept: bool
364 |                 ):
365 | 
366 |         # stage1
367 |         feature = DFLModel.augment_feature(treatment_feature,
368 |                                            covariate_feature,
369 |                                            add_intercept)
370 | 
371 |         weight = fit_linear(outcome_t, feature, lam)
372 |         pred = linear_reg_pred(feature, weight)
373 |         loss = torch.norm((outcome_t - pred)) ** 2 + lam * torch.norm(weight) ** 2
374 | 
375 |         labels = logit(outcome_t)
376 |         logits = logit(pred)
377 |         log_loss = (-(torch.log(logits) * labels+torch.log(1-logits) * (1-labels))).sum() + lam * torch.norm(weight) ** 2
378 | 
379 |         return dict(weight=weight, loss=loss, log_loss=log_loss)
380 | 
381 |     def fit_t(self, train_data_t: TrainDataSetTorch, lam: float):
382 |         treatment_feature = self.treatment_net(train_data_t.treatment)
383 |         outcome_t = train_data_t.outcome
384 |         covariate_feature = None
385 |         if self.covariate_net is not None:
386 |             covariate_feature = self.covariate_net(train_data_t.covariate)
387 | 
388 |         res = DFLModel.fit_dfl(treatment_feature, covariate_feature, outcome_t, lam, self.add_intercept)
389 |         self.weight_mat = res["weight"]
390 | 
391 |     def fit(self, train_data: TrainDataSet, lam: float):
392 |         train_data_t = TrainDataSetTorch.from_numpy(train_data)
393 |         self.fit_t(train_data_t, lam)
394 | 
395 |     def predict_t(self, treatment: torch.Tensor, covariate: Optional[torch.Tensor]):
396 |         treatment_feature = self.treatment_net(treatment)
397 |         covariate_feature = None
398 |         if self.covariate_net:
399 |             covariate_feature = self.covariate_net(covariate)
400 | 
401 |         feature = DFLModel.augment_feature(treatment_feature, covariate_feature, self.add_intercept)
402 |         return linear_reg_pred(feature, self.weight_mat)
403 | 
404 |     def predict(self, treatment: np.ndarray, covariate: Optional[np.ndarray]):
405 |         treatment_t = torch.tensor(treatment, dtype=torch.float32).to(self.device)
406 |         covariate_t = None
407 |         if covariate is not None:
408 |             covariate_t = torch.tensor(covariate, dtype=torch.float32).to(self.device)
409 |         return self.predict_t(treatment_t, covariate_t).data.detach().cpu().numpy()
410 | 
411 |     def evaluate_t(self, y_loss: str, test_data: TestDataSetTorch):
412 |         target = test_data.structural
413 |         with torch.no_grad():
414 |             pred = self.predict_t(test_data.treatment, test_data.covariate)
415 |         if y_loss == 'bin':
416 |             return (torch.norm((target - pred)) ** 2) / target.size()[0]
417 |         else:
418 |             return (torch.norm((target - pred)) ** 2) / target.size()[0]
419 | 
420 |     def evaluate(self, y_loss: str, test_data: TestDataSet):
421 |         return self.evaluate_t(y_loss, TestDataSetTorch.from_numpy(test_data)).data.detach().cpu().item()
422 | 
423 | class DFIVModel:
424 |     stage1_weight: torch.Tensor
425 |     stage2_weight: torch.Tensor
426 | 
427 |     def __init__(self,
428 |                  treatment_net: nn.Module,
429 |                  instrumental_net: nn.Module,
430 |                  covariate_net: Optional[nn.Module],
431 |                  add_stage1_intercept: bool,
432 |                  add_stage2_intercept: bool
433 |                  ):
434 |         self.treatment_net = treatment_net
435 |         self.instrumental_net = instrumental_net
436 |         self.covariate_net = covariate_net
437 |         self.add_stage1_intercept = add_stage1_intercept
438 |         self.add_stage2_intercept = add_stage2_intercept
439 | 
440 |     @staticmethod
441 |     def augment_stage1_feature(instrumental_feature: torch.Tensor,
442 |                                add_stage1_intercept: bool):
443 | 
444 |         feature = instrumental_feature
445 |         if add_stage1_intercept:
446 |             feature = add_const_col(feature)
447 |         return feature
448 | 
449 |     @staticmethod
450 |     def augment_stage2_feature(predicted_treatment_feature: torch.Tensor,
451 |                                covariate_feature: Optional[torch.Tensor],
452 |                                add_stage2_intercept: bool):
453 |         feature = predicted_treatment_feature
454 |         if add_stage2_intercept:
455 |             feature = add_const_col(feature)
456 | 
457 |         if covariate_feature is not None:
458 |             feature_tmp = covariate_feature
459 |             if add_stage2_intercept:
460 |                 feature_tmp = add_const_col(feature_tmp)
461 |             feature = outer_prod(feature, feature_tmp)
462 |             feature = torch.flatten(feature, start_dim=1)
463 | 
464 |         return feature
465 | 
466 |     @staticmethod
467 |     def fit_2sls(treatment_1st_feature: torch.Tensor,
468 |                  instrumental_1st_feature: torch.Tensor,
469 |                  instrumental_2nd_feature: torch.Tensor,
470 |                  covariate_2nd_feature: Optional[torch.Tensor],
471 |                  outcome_2nd_t: torch.Tensor,
472 |                  lam1: float, lam2: float,
473 |                  add_stage1_intercept: bool,
474 |                  add_stage2_intercept: bool,
475 |                  ):
476 | 
477 |         # stage1
478 |         feature = DFIVModel.augment_stage1_feature(instrumental_1st_feature, add_stage1_intercept)
479 |         stage1_weight = fit_linear(treatment_1st_feature, feature, lam1)
480 | 
481 |         # predicting for stage 2
482 |         feature = DFIVModel.augment_stage1_feature(instrumental_2nd_feature,
483 |                                                    add_stage1_intercept)
484 |         predicted_treatment_feature = linear_reg_pred(feature, stage1_weight)
485 | 
486 |         # stage2
487 |         feature = DFIVModel.augment_stage2_feature(predicted_treatment_feature,
488 |                                                    covariate_2nd_feature,
489 |                                                    add_stage2_intercept)
490 | 
491 |         stage2_weight = fit_linear(outcome_2nd_t, feature, lam2)
492 |         pred = linear_reg_pred(feature, stage2_weight)
493 |         stage2_loss = torch.norm((outcome_2nd_t - pred)) ** 2 + lam2 * torch.norm(stage2_weight) ** 2
494 | 
495 |         labels = logit(outcome_2nd_t)
496 |         logits = logit(pred)
497 |         stage2_log_loss = (-(torch.log(logits) * labels+torch.log(1-logits) * (1-labels))).sum() + lam2 * torch.norm(stage2_weight) ** 2
498 | 
499 |         return dict(stage1_weight=stage1_weight,
500 |                     predicted_treatment_feature=predicted_treatment_feature,
501 |                     stage2_weight=stage2_weight,
502 |                     stage2_loss=stage2_loss,
503 |                     stage2_log_loss=stage2_log_loss)
504 | 
505 |     def fit_t(self,
506 |               train_1st_data_t: TrainDataSetTorch,
507 |               train_2nd_data_t: TrainDataSetTorch,
508 |               lam1: float, lam2: float):
509 | 
510 |         treatment_1st_feature = self.treatment_net(train_1st_data_t.treatment)
511 |         instrumental_1st_feature = self.instrumental_net(train_1st_data_t.instrumental)
512 |         instrumental_2nd_feature = self.instrumental_net(train_2nd_data_t.instrumental)
513 |         outcome_2nd_t = train_2nd_data_t.outcome
514 |         covariate_2nd_feature = None
515 |         if self.covariate_net is not None:
516 |             covariate_2nd_feature = self.covariate_net(train_2nd_data_t.covariate)
517 | 
518 |         res = DFIVModel.fit_2sls(treatment_1st_feature,
519 |                                  instrumental_1st_feature,
520 |                                  instrumental_2nd_feature,
521 |                                  covariate_2nd_feature,
522 |                                  outcome_2nd_t,
523 |                                  lam1, lam2,
524 |                                  self.add_stage1_intercept,
525 |                                  self.add_stage2_intercept)
526 | 
527 |         self.stage1_weight = res["stage1_weight"]
528 |         self.stage2_weight = res["stage2_weight"]
529 | 
530 |     def fit(self, train_1st_data: TrainDataSet, train_2nd_data: TrainDataSet, lam1: float, lam2: float):
531 |         train_1st_data_t = TrainDataSetTorch.from_numpy(train_1st_data)
532 |         train_2nd_data_t = TrainDataSetTorch.from_numpy(train_2nd_data)
533 |         self.fit_t(train_1st_data_t, train_2nd_data_t, lam1, lam2)
534 | 
535 |     def predict_t(self, treatment: torch.Tensor, covariate: Optional[torch.Tensor]):
536 |         treatment_feature = self.treatment_net(treatment)
537 |         covariate_feature = None
538 |         if self.covariate_net:
539 |             covariate_feature = self.covariate_net(covariate)
540 | 
541 |         feature = DFIVModel.augment_stage2_feature(treatment_feature,
542 |                                                    covariate_feature,
543 |                                                    self.add_stage2_intercept)
544 |         return linear_reg_pred(feature, self.stage2_weight)
545 | 
546 |     def predict(self, treatment: np.ndarray, covariate: Optional[np.ndarray]):
547 |         treatment_t = torch.tensor(treatment, dtype=torch.float32)
548 |         covariate_t = None
549 |         if covariate is not None:
550 |             covariate_t = torch.tensor(covariate, dtype=torch.float32)
551 |         return self.predict_t(treatment_t, covariate_t).data.numpy()
552 | 
553 |     def evaluate_t(self, y_loss: str, test_data: TestDataSetTorch):
554 |         target = test_data.structural
555 |         with torch.no_grad():
556 |             pred = self.predict_t(test_data.treatment, test_data.covariate)
557 |         if y_loss == 'bin':
558 |             return (torch.norm((target - pred)) ** 2) / target.size()[0]
559 |         else:
560 |             return (torch.norm((target - pred)) ** 2) / target.size()[0]
561 | 
562 |     def evaluate(self, y_loss: str, test_data: TestDataSet):
563 |         return self.evaluate_t(y_loss, TestDataSetTorch.from_numpy(test_data)).data.item()
564 | 
565 | ########## TrainerATE
566 | class DFLTrainer(object):
567 | 
568 |     def __init__(self, t_loss: str, y_loss: str, data_list: List, net_list: List, train_params: Dict[str, Any],
569 |                  gpu_flg: bool = False, dump_folder: Optional[Path] = None):
570 |         self.t_loss = t_loss
571 |         self.y_loss = y_loss
572 | 
573 |         self.data_list = data_list
574 | 
575 |         if gpu_flg and torch.cuda.is_available():
576 |             self.device = "cuda:0"
577 |         else:
578 |             self.device = "cpu"
579 | 
580 |         # configure training params
581 |         self.epochs: int = train_params["epochs"]
582 |         self.treatment_weight_decay = train_params["treatment_weight_decay"]
583 |         self.covariate_weight_decay = train_params["covariate_weight_decay"]
584 |         self.lam: float = train_params["lam"]
585 |         self.n_iter_treatment = train_params["n_iter_treatment"]
586 |         self.n_iter_covariate = train_params["n_iter_covariate"]
587 |         self.add_intercept: bool = train_params["add_intercept"]
588 | 
589 |         # build networks
590 |         networks = net_list
591 |         self.treatment_net: nn.Module = networks[0]
592 |         self.covariate_net: Optional[nn.Module] = networks[2]
593 | 
594 |         self.treatment_net.to(self.device)
595 |         if self.covariate_net is not None:
596 |             self.covariate_net.to(self.device)
597 | 
598 |         self.treatment_opt = torch.optim.Adam(self.treatment_net.parameters(),
599 |                                               weight_decay=self.treatment_weight_decay)
600 |         if self.covariate_net:
601 |             self.covariate_opt = torch.optim.Adam(self.covariate_net.parameters(),
602 |                                                   weight_decay=self.covariate_weight_decay)
603 |         self.monitor = None
604 |         if dump_folder is not None:
605 |             self.monitor = DFLMonitor(t_loss, y_loss, dump_folder, self)
606 | 
607 |     def train(self, rand_seed: int = 42, verbose: int = 0, epoch_show: int = 20) -> float:
608 |         """
609 | 
610 |         Parameters
611 |         ----------
612 |         rand_seed: int
613 |             random seed
614 |         verbose : int
615 |             Determine the level of logging
616 |         Returns
617 |         -------
618 |         oos_result : float
619 |             The performance of model evaluated by oos
620 |         """
621 |         train_data = self.data_list[0]
622 |         test_data = self.data_list[2]
623 |         train_data_t = TrainDataSetTorch.from_numpy(train_data)
624 |         test_data_t = TestDataSetTorch.from_numpy(test_data)
625 |         train_data_t = train_data_t.to(self.device)
626 |         test_data_t = test_data_t.to(self.device)
627 | 
628 |         if self.monitor is not None:
629 |             validation_data = self.data_list[1]
630 |             validation_data_t = TrainDataSetTorch.from_numpy(validation_data)
631 |             validation_data_t = validation_data_t.to(self.device)
632 |             self.monitor.configure_data(train_data_t, test_data_t, validation_data_t)
633 | 
634 |         self.lam *= train_data_t[0].size()[0]
635 | 
636 |         for t in range(self.epochs):
637 |             self.update_treatment(train_data_t, verbose)
638 |             if self.covariate_net:
639 |                 self.update_covariate_net(train_data_t, verbose)
640 | 
641 |             if t % epoch_show == 0 or t == self.epochs - 1:
642 |                 if verbose >= 1:
643 |                     print(f"Epoch {t} ended")
644 |                 if self.monitor is not None:
645 |                     self.monitor.record(verbose)
646 | 
647 |         mdl = DFLModel(self.treatment_net, self.covariate_net, self.add_intercept, self.device)
648 |         mdl.fit_t(train_data_t, self.lam)
649 |         torch.cuda.empty_cache()
650 | 
651 |         oos_loss: float = mdl.evaluate_t(self.y_loss, test_data_t).data.item()
652 |         if verbose >= 1:
653 |             print(f"test_loss:{oos_loss}")
654 |         return oos_loss, mdl
655 | 
656 |     def update_treatment(self, train_data_t, verbose):
657 | 
658 |         self.treatment_net.train(True)
659 |         if self.covariate_net:
660 |             self.covariate_net.train(False)
661 | 
662 |         # have covariate features
663 |         covariate_feature = None
664 |         if self.covariate_net:
665 |             covariate_feature = self.covariate_net(train_data_t.covariate).detach()
666 | 
667 |         for i in range(self.n_iter_treatment):
668 |             self.treatment_opt.zero_grad()
669 |             treatment_feature = self.treatment_net(train_data_t.treatment)
670 |             res = DFLModel.fit_dfl(treatment_feature, covariate_feature, train_data_t.outcome,
671 |                                    self.lam, self.add_intercept)
672 |             if self.y_loss == 'bin':
673 |                 loss = res["log_loss"]
674 |             else:
675 |                 loss = res["loss"]
676 |             loss.backward()
677 |             if verbose >= 2:
678 |                 print(f"treatment learning: {loss.item()}")
679 |             self.treatment_opt.step()
680 | 
681 |     def update_covariate_net(self, train_data_t: TrainDataSetTorch, verbose: int):
682 |         self.treatment_net.train(False)
683 |         treatment_feature = self.treatment_net(train_data_t.treatment).detach()
684 |         self.covariate_net.train(True)
685 |         for i in range(self.n_iter_covariate):
686 |             self.covariate_opt.zero_grad()
687 |             covariate_feature = self.covariate_net(train_data_t.covariate)
688 |             res = DFLModel.fit_dfl(treatment_feature, covariate_feature, train_data_t.outcome,
689 |                                    self.lam, self.add_intercept)
690 |             if self.y_loss == 'bin':
691 |                 loss = res["log_loss"]
692 |             else:
693 |                 loss = res["loss"]
694 |             loss.backward()
695 |             if verbose >= 2:
696 |                 print(f"update covariate: {loss.item()}")
697 |             self.covariate_opt.step()
698 | 
699 | 
700 | def build_net(t_input_dim, z_input_dim, x_input_dim):
701 |     treatment_net = nn.Sequential(nn.Linear(t_input_dim, 16),
702 |                                   nn.ReLU(),
703 |                                   nn.Linear(16, 1))
704 | 
705 |     instrumental_net = nn.Sequential(nn.Linear(z_input_dim, 128),
706 |                                      nn.ReLU(),
707 |                                      nn.Linear(128, 64),
708 |                                      nn.ReLU(),
709 |                                      nn.Linear(64, 32),
710 |                                      nn.BatchNorm1d(32))
711 | 
712 |     covariate_net = nn.Sequential(nn.Linear(x_input_dim, 128),
713 |                                   nn.ReLU(),
714 |                                   nn.Linear(128, 32),
715 |                                   nn.BatchNorm1d(32),
716 |                                   nn.ReLU(),
717 |                                   nn.Linear(32, 16),
718 |                                   nn.ReLU())
719 |     
720 |     return treatment_net, instrumental_net, covariate_net
721 | 
722 | class DFL(object):
723 |     def __init__(self) -> None:
724 |         self.config = {
725 |                     'methodName': 'DFL',
726 |                     't_loss': 'cont',
727 |                     'y_loss': 'cont',
728 |                     "epochs": 100, 
729 |                     "lam": 0.1, 
730 |                     'n_iter_treatment': 20,
731 |                     'n_iter_covariate': 20,
732 |                     'treatment_weight_decay': 0.0,
733 |                     'covariate_weight_decay': 0.1,
734 |                     "add_intercept": True, 
735 |                     'epoch_show': 10,
736 |                     'verbose': 0,
737 |                     'use_gpu': True, 
738 |                     'seed': 2022,   
739 |                     }
740 | 
741 |     def set_Configuration(self, config):
742 |         self.config = config
743 | 
744 |     def fit(self, data, exp=-1, config=None):
745 |         if config is None:
746 |             config = self.config
747 | 
748 |         train_config = {"epochs": config["epochs"], 
749 |                     "lam": config["lam"], 
750 |                     'n_iter_treatment': config['n_iter_treatment'], 
751 |                     'n_iter_covariate': config['n_iter_covariate'], 
752 |                     'treatment_weight_decay': config['treatment_weight_decay'], 
753 |                     'covariate_weight_decay': config['covariate_weight_decay'], 
754 |                     "add_intercept": config["add_intercept"], 
755 |                     }   
756 | 
757 |         set_seed(config['seed'])
758 |         data.numpy()
759 | 
760 |         self.z_dim = data.train.z.shape[1]
761 |         self.x_dim = data.train.x.shape[1]
762 |         self.t_dim = data.train.t.shape[1]
763 | 
764 |         t_input_dim = self.t_dim 
765 |         z_input_dim = self.z_dim + self.x_dim
766 |         x_input_dim = self.x_dim
767 | 
768 |         train_z = np.concatenate((data.train.z,data.train.x),1)
769 |         train_x = data.train.x
770 |         val_z = np.concatenate((data.valid.z,data.valid.x),1)
771 |         val_x = data.valid.x
772 |         test_z = np.concatenate((data.test.z,data.test.x),1)
773 |         test_x = data.test.x
774 | 
775 |         if config['t_loss'] == 'bin':
776 |             train_t = data.train.t
777 |             train_t[train_t==0] = -6.9068 # ln(1/999), y = 0.001
778 |             train_t[train_t==1] = 6.9068  # ln(999), y = 0.999
779 |             val_t = data.valid.t
780 |             val_t[val_t==0] = -6.9068 # ln(1/999), y = 0.001
781 |             val_t[val_t==1] = 6.9068  # ln(999), y = 0.999
782 |             test_t = data.test.t
783 |             test_t[test_t==0] = -6.9068 # ln(1/999), y = 0.001
784 |             test_t[test_t==1] = 6.9068  # ln(999), y = 0.999
785 |         else:
786 |             train_t = data.train.t
787 |             val_t   = data.valid.t
788 |             test_t  = data.test.t
789 | 
790 |         if config['y_loss'] == 'bin':
791 |             train_y = data.train.y
792 |             train_y[train_y==0] = -6.9068 # ln(1/999), y = 0.001
793 |             train_y[train_y==1] = 0.999 
794 |             val_y = data.valid.y
795 |             val_y[val_y==0] = -6.9068 # ln(1/999), y = 0.001
796 |             val_y[val_y==1] = 6.9068  # ln(999), y = 0.999
797 |             test_y = data.test.y
798 |             test_y[test_y==0] = -6.9068 # ln(1/999), y = 0.001
799 |             test_y[test_y==1] = 6.9068  # ln(999), y = 0.999
800 |         else:
801 |             train_y = data.train.y
802 |             val_y   = data.valid.y
803 |             test_y  = data.test.y
804 | 
805 |         train_data = TrainDataSet(treatment=train_t,
806 |                                 instrumental=train_z,
807 |                                 covariate=train_x,
808 |                                 outcome=train_y,
809 |                                 structural=train_y)
810 |         val_data = TrainDataSet(treatment=val_t,
811 |                                 instrumental=val_z,
812 |                                 covariate=val_x,
813 |                                 outcome=val_y,
814 |                                 structural=val_y)
815 |         test_data = TestDataSet(treatment=test_t,
816 |                                 instrumental=test_z,
817 |                                 covariate=test_x,
818 |                                 structural=test_y,
819 |                                 outcome=test_y)
820 |         data_list = [train_data, val_data, test_data]
821 | 
822 |         treatment_net, instrumental_net, covariate_net = build_net(t_input_dim, z_input_dim, x_input_dim)
823 |         net_list = [treatment_net, None, covariate_net]
824 | 
825 |         print('Run {}-th experiment for {}. '.format(exp, config['methodName']))
826 | 
827 |         trainer = DFLTrainer(config['t_loss'], config['y_loss'], data_list, net_list, train_config, config['use_gpu'], './tmp/')
828 |         test_loss, mdl = trainer.train(rand_seed=config['seed'] , verbose=config['verbose'] , epoch_show=config['epoch_show'] )
829 | 
830 |         def estimation(data):
831 |             return mdl.predict_t(data.t-data.t, data.x), mdl.predict_t(data.t, data.x)
832 | 
833 |         print('End. ' + '-'*20)
834 |         
835 |         self.mdl = mdl
836 |         self.estimation = estimation
837 | 
838 |     def predict(self, data=None, t=None, x=None):
839 |         if data is None:
840 |             data = self.data.test
841 | 
842 |         if x is None:
843 |             x = data.x
844 | 
845 |         if t is None:
846 |             t = data.t
847 | 
848 |         with torch.no_grad():
849 |             pred = self.mdl.predict(t, x)
850 | 
851 |         return pred
852 | 
853 |     def ITE(self, data=None, t=None, x=None):
854 |         if data is None:
855 |             data = self.data.test
856 | 
857 |         if x is None:
858 |             x = data.x
859 | 
860 |         if t is None:
861 |             t = data.t
862 | 
863 |         ITE_0 = self.mdl.predict(t-t,x)
864 |         ITE_1 = self.mdl.predict(t-t+1,x)
865 |         ITE_t = self.mdl.predict(t,x)
866 | 
867 |         return ITE_0,ITE_1,ITE_t
868 | 
869 |     def ATE(self, data=None, t=None, x=None):
870 |         ITE_0,ITE_1,ITE_t = self.ITE(data,t,x)
871 | 
872 |         return np.mean(ITE_1-ITE_0), np.mean(ITE_t-ITE_0)
873 |     
874 | 


--------------------------------------------------------------------------------
/mliv/inference/gmm/__init__.py:
--------------------------------------------------------------------------------
1 | from .agmm_v1 import AGMM
2 | from .deepgmm_v1 import DeepGMM


--------------------------------------------------------------------------------
/mliv/inference/gmm/agmm_v1/__init__.py:
--------------------------------------------------------------------------------
 1 | from .trainer import AGMM
 2 | 
 3 | example = '''
 4 | from mliv.inference import AGMM
 5 | 
 6 | model = AGMM()
 7 | model.fit(data)
 8 | ITE = model.predict(data.train)
 9 | ATE,_ = model.ATE(data.train)
10 | '''


--------------------------------------------------------------------------------
/mliv/inference/gmm/agmm_v1/net.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import tempfile
  4 | import torch
  5 | import torch.nn as nn
  6 | from torch.utils.data import DataLoader, TensorDataset
  7 | from .oadam import OAdam
  8 | from .rbflayer import RBF
  9 | 
 10 | # TODO. This epsilon is used only because pytorch 1.5 has an instability in torch.cdist
 11 | # when the input distance is close to zero, due to instability of the square root in
 12 | # automatic differentiation. Should be removed once pytorch fixes the instability.
 13 | # It can be set to 0 if using pytorch 1.4.0
 14 | EPSILON = 1e-2
 15 | 
 16 | 
 17 | def add_weight_decay(net, l2_value, skip_list=()):
 18 |     decay, no_decay = [], []
 19 |     for name, param in net.named_parameters():
 20 |         if not param.requires_grad:
 21 |             continue  # frozen weights
 22 |         if len(param.shape) == 1 or name.endswith(".bias") or name in skip_list:
 23 |             no_decay.append(param)
 24 |         else:
 25 |             decay.append(param)
 26 |     return [{'params': no_decay, 'weight_decay': 0.}, {'params': decay, 'weight_decay': l2_value}]
 27 | 
 28 | 
 29 | def _kernel(x, y, basis_func, sigma):
 30 |     return basis_func(torch.cdist(x, y + EPSILON) * torch.abs(sigma))
 31 | 
 32 | 
 33 | class _BaseAGMM:
 34 | 
 35 |     def _pretrain(self, Z, T, Y,
 36 |                   learner_l2, adversary_l2, adversary_norm_reg,
 37 |                   learner_lr, adversary_lr, n_epochs, bs, train_learner_every, train_adversary_every,
 38 |                   warm_start, results_dir, device, verbose, add_sample_inds=False):
 39 |         """ Prepares the variables required to begin training.
 40 |         """
 41 |         self.verbose = verbose
 42 | 
 43 |         model_dir = results_dir+'agmm_model'
 44 |         if not os.path.exists(model_dir):
 45 |             os.makedirs(model_dir)
 46 |         self.tempdir = tempfile.TemporaryDirectory(dir=model_dir)
 47 |         self.model_dir = self.tempdir.name
 48 |         
 49 |         tfboard_dir = results_dir+'agmm_tfboard'
 50 |         os.makedirs(os.path.dirname(tfboard_dir), exist_ok=True)
 51 |         self.tfboard_dir = tfboard_dir
 52 | 
 53 |         self.n_epochs = n_epochs
 54 | 
 55 |         if add_sample_inds:
 56 |             sample_inds = torch.tensor(np.arange(Y.shape[0]))
 57 |             self.train_ds = TensorDataset(Z, T, Y, sample_inds)
 58 |         else:
 59 |             self.train_ds = TensorDataset(Z, T, Y)
 60 |         self.train_dl = DataLoader(self.train_ds, batch_size=bs, shuffle=True)
 61 | 
 62 |         self.learner = self.learner.to(device)
 63 |         self.adversary = self.adversary.to(device)
 64 | 
 65 |         if not warm_start:
 66 |             self.learner.apply(lambda m: (
 67 |                 m.reset_parameters() if hasattr(m, 'reset_parameters') else None))
 68 |             self.adversary.apply(lambda m: (
 69 |                 m.reset_parameters() if hasattr(m, 'reset_parameters') else None))
 70 | 
 71 |         beta1 = 0.
 72 |         self.optimizerD = OAdam(add_weight_decay(self.learner, learner_l2),
 73 |                                 lr=learner_lr, betas=(beta1, .01))
 74 |         self.optimizerG = OAdam(add_weight_decay(
 75 |             self.adversary, adversary_l2, skip_list=self.skip_list), lr=adversary_lr, betas=(beta1, .01))
 76 | 
 77 |         return Z, T, Y
 78 | 
 79 |     def predict(self, T, model='avg', burn_in=0, alpha=None):
 80 |         """
 81 |         Parameters
 82 |         ----------
 83 |         T : treatments
 84 |         model : one of ('avg', 'final'), whether to use an average of models or the final
 85 |         burn_in : discard the first "burn_in" epochs when doing averaging
 86 |         alpha : if not None but a float, then it also returns the a/2 and 1-a/2, percentile of
 87 |             the predictions across different epochs (proxy for a confidence interval)
 88 |         """
 89 |         if model == 'avg':
 90 |             preds = np.array([torch.load(os.path.join(self.model_dir,"epoch{}".format(i)))(T).cpu().data.numpy()
 91 |                               for i in np.arange(burn_in, self.n_epochs)])
 92 |             if alpha is None:
 93 |                 return np.mean(preds, axis=0)
 94 |             else:
 95 |                 return np.mean(preds, axis=0),np.percentile(preds, 100 * alpha / 2, axis=0), np.percentile(preds, 100 * (1 - alpha / 2), axis=0)
 96 |         if model == 'final':
 97 |             return torch.load(os.path.join(self.model_dir,"epoch{}".format(self.n_epochs - 1)))(T).cpu().data.numpy()
 98 |         if isinstance(model, int):
 99 |             return torch.load(os.path.join(self.model_dir,"epoch{}".format(model)))(T).cpu().data.numpy()
100 | 
101 | 
102 | class _BaseSupLossAGMM(_BaseAGMM):
103 | 
104 |     def fit(self, Z, T, Y, Z_val, T_val, Y_val, T_test_tens, G_val, 
105 |             learner_l2=1e-3, adversary_l2=1e-4, adversary_norm_reg=1e-3,
106 |             learner_lr=0.001, adversary_lr=0.001, n_epochs=100, bs=100, train_learner_every=1, train_adversary_every=1,
107 |             ols_weight=0., warm_start=False, results_dir='.', device=None, verbose=0):
108 |         """
109 |         Parameters
110 |         ----------
111 |         Z : instruments
112 |         T : treatments
113 |         Y : outcome
114 |         learner_l2, adversary_l2 : l2_regularization of parameters of learner and adversary
115 |         adversary_norm_reg : adveresary norm regularization weight
116 |         learner_lr : learning rate of the Adam optimizer for learner
117 |         adversary_lr : learning rate of the Adam optimizer for adversary
118 |         n_epochs : how many passes over the data
119 |         bs : batch size
120 |         train_learner_every : after how many training iterations of the adversary should we train the learner
121 |         ols_weight : weight on OLS (square loss) objective
122 |         warm_start : if False then network parameters are initialized at the beginning, otherwise we start
123 |             from their current weights
124 |         results_dir : folder where to store the learned models after every epoch
125 |         """
126 | 
127 |         Z, T, Y = self._pretrain(Z, T, Y,
128 |                                  learner_l2, adversary_l2, adversary_norm_reg,
129 |                                  learner_lr, adversary_lr, n_epochs, bs, train_learner_every, train_adversary_every,
130 |                                  warm_start, results_dir, device, verbose)
131 | 
132 |         for epoch in range(n_epochs):
133 | 
134 |             if self.verbose > 0:
135 |                 print("Epoch #", epoch, sep="")
136 | 
137 |             for it, (zb, xb, yb) in enumerate(self.train_dl):
138 | 
139 |                 zb, xb, yb = map(lambda x: x.to(device), (zb, xb, yb))
140 | 
141 |                 if (it % train_learner_every == 0):
142 |                     self.learner.train()
143 |                     pred = self.learner(xb)
144 |                     test = self.adversary(zb)
145 |                     D_loss = torch.mean((yb - pred) * test) + ols_weight * torch.mean((yb - pred)**2)
146 |                     self.optimizerD.zero_grad()
147 |                     D_loss.backward()
148 |                     self.optimizerD.step()
149 |                     self.learner.eval()
150 | 
151 |                 if (it % train_adversary_every == 0):
152 |                     self.adversary.train()
153 |                     pred = self.learner(xb)
154 |                     reg = 0
155 |                     if self.adversary_reg:
156 |                         test, reg = self.adversary(zb, reg=True)
157 |                     else:
158 |                         test = self.adversary(zb)
159 |                     G_loss = - torch.mean((yb - pred) * test) + torch.mean(test**2)
160 |                     G_loss += adversary_norm_reg * reg
161 |                     self.optimizerG.zero_grad()
162 |                     G_loss.backward()
163 |                     self.optimizerG.step()
164 |                     self.adversary.eval()
165 | 
166 |             torch.save(self.learner, os.path.join(self.model_dir, "epoch{}".format(epoch)))
167 | 
168 |         return self
169 | 
170 | 
171 | class AGMM_Net(_BaseSupLossAGMM):
172 | 
173 |     def __init__(self, learner, adversary):
174 |         """
175 |         Parameters
176 |         ----------
177 |         learner : a pytorch neural net module
178 |         adversary : a pytorch neural net module
179 |         """
180 |         self.learner = learner
181 |         self.adversary = adversary
182 |         # whether we have a norm penalty for the adversary
183 |         self.adversary_reg = False
184 |         # which adversary parameters to not ell2 penalize
185 |         self.skip_list = []
186 | 
187 | 
188 | 
189 | 


--------------------------------------------------------------------------------
/mliv/inference/gmm/agmm_v1/oadam.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT License.
  3 | 
  4 | """
  5 | Forked from the repository:
  6 | https://github.com/georgepar/optimistic-adam
  7 | By George Paraskevopoulos on April 15, 2020
  8 | """
  9 | import math
 10 | import torch
 11 | from torch.optim import Optimizer
 12 | 
 13 | 
 14 | class OAdam(Optimizer):
 15 |     """Implements optimistic Adam algorithm.
 16 | 
 17 |     It has been proposed in `Training GANs with Optimism`_.
 18 | 
 19 |     Arguments:
 20 |         params (iterable): iterable of parameters to optimize or dicts defining
 21 |             parameter groups
 22 |         lr (float, optional): learning rate (default: 1e-3)
 23 |         betas (Tuple[float, float], optional): coefficients used for computing
 24 |             running averages of gradient and its square (default: (0.9, 0.999))
 25 |         eps (float, optional): term added to the denominator to improve
 26 |             numerical stability (default: 1e-8)
 27 |         weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
 28 |         amsgrad (boolean, optional): whether to use the AMSGrad variant of this
 29 |             algorithm from the paper `On the Convergence of Adam and Beyond`_
 30 |             (default: False)
 31 | 
 32 |     .. _Training GANs with Optimism:
 33 |         https://arxiv.org/abs/1711.00141
 34 |     """
 35 | 
 36 |     def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
 37 |                  weight_decay=0, amsgrad=False):
 38 |         if not 0.0 <= lr:
 39 |             raise ValueError("Invalid learning rate: {}".format(lr))
 40 |         if not 0.0 <= eps:
 41 |             raise ValueError("Invalid epsilon value: {}".format(eps))
 42 |         if not 0.0 <= betas[0] < 1.0:
 43 |             raise ValueError(
 44 |                 "Invalid beta parameter at index 0: {}".format(betas[0]))
 45 |         if not 0.0 <= betas[1] < 1.0:
 46 |             raise ValueError(
 47 |                 "Invalid beta parameter at index 1: {}".format(betas[1]))
 48 |         defaults = dict(lr=lr, betas=betas, eps=eps,
 49 |                         weight_decay=weight_decay, amsgrad=amsgrad)
 50 |         super(OAdam, self).__init__(params, defaults)
 51 | 
 52 |     def __setstate__(self, state):
 53 |         super(Adam, self).__setstate__(state)
 54 |         for group in self.param_groups:
 55 |             group.setdefault('amsgrad', False)
 56 | 
 57 |     def step(self, closure=None):
 58 |         """Performs a single optimization step.
 59 | 
 60 |         Arguments:
 61 |             closure (callable, optional): A closure that reevaluates the model
 62 |                 and returns the loss.
 63 |         """
 64 |         loss = None
 65 |         if closure is not None:
 66 |             loss = closure()
 67 | 
 68 |         for group in self.param_groups:
 69 |             for p in group['params']:
 70 |                 if p.grad is None:
 71 |                     continue
 72 |                 grad = p.grad.data
 73 |                 if grad.is_sparse:
 74 |                     raise RuntimeError(
 75 |                         'Adam does not support sparse gradients, please consider SparseAdam instead')
 76 |                 amsgrad = group['amsgrad']
 77 | 
 78 |                 state = self.state[p]
 79 | 
 80 |                 # State initialization
 81 |                 if len(state) == 0:
 82 |                     state['step'] = 0
 83 |                     # Exponential moving average of gradient values
 84 |                     state['exp_avg'] = torch.zeros_like(p.data)
 85 |                     # Exponential moving average of squared gradient values
 86 |                     state['exp_avg_sq'] = torch.zeros_like(p.data)
 87 |                     if amsgrad:
 88 |                         # Maintains max of all exp. moving avg. of sq. grad. values
 89 |                         state['max_exp_avg_sq'] = torch.zeros_like(p.data)
 90 | 
 91 |                 exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
 92 |                 if amsgrad:
 93 |                     max_exp_avg_sq = state['max_exp_avg_sq']
 94 |                 beta1, beta2 = group['betas']
 95 | 
 96 |                 state['step'] += 1
 97 | 
 98 |                 if group['weight_decay'] != 0:
 99 |                     grad.add_(group['weight_decay'], p.data)
100 | 
101 |                 bias_correction1 = 1 - beta1 ** state['step']
102 |                 bias_correction2 = 1 - beta2 ** state['step']
103 |                 step_size = group['lr'] * \
104 |                     math.sqrt(bias_correction2) / bias_correction1
105 | 
106 |                 # Optimistic update :)
107 |                 p.data.addcdiv_(step_size, exp_avg,
108 |                                 exp_avg_sq.sqrt().add(group['eps']))
109 | 
110 |                 # Decay the first and second moment running average coefficient
111 |                 exp_avg.mul_(beta1).add_(1 - beta1, grad)
112 |                 exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
113 |                 if amsgrad:
114 |                     # Maintains the maximum of all 2nd moment running avg. till now
115 |                     torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
116 |                     # Use the max. for normalizing running avg. of gradient
117 |                     denom = max_exp_avg_sq.sqrt().add_(group['eps'])
118 |                 else:
119 |                     denom = exp_avg_sq.sqrt().add_(group['eps'])
120 | 
121 |                 p.data.addcdiv_(-2.0 * step_size, exp_avg, denom)
122 | 
123 |         return loss
124 | 


--------------------------------------------------------------------------------
/mliv/inference/gmm/agmm_v1/rbflayer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT License.
  3 | 
  4 | """
  5 | Forked from the repository:
  6 | https://github.com/JeremyLinux/PyTorch-Radial-Basis-Function-Layer
  7 | By Hamish Flynn on April 15, 2020
  8 | """
  9 | import torch
 10 | import torch.nn as nn
 11 | 
 12 | # RBF Layer
 13 | 
 14 | 
 15 | class RBF(nn.Module):
 16 |     """
 17 |     Transforms incoming data using a given radial basis function:
 18 |     u_{i} = rbf(||x - c_{i}|| / s_{i})
 19 | 
 20 |     Arguments:
 21 |         in_features: size of each input sample
 22 |         out_features: size of each output sample
 23 | 
 24 |     Shape:
 25 |         - Input: (N, in_features) where N is an arbitrary batch size
 26 |         - Output: (N, out_features) where N is an arbitrary batch size
 27 | 
 28 |     Attributes:
 29 |         centres: the learnable centres of shape (out_features, in_features).
 30 |             The values are initialised from a standard normal distribution.
 31 |             Normalising inputs to have mean 0 and standard deviation 1 is
 32 |             recommended.
 33 | 
 34 |         sigmas: the learnable scaling factors of shape (out_features).
 35 |             The values are initialised as ones.
 36 | 
 37 |         basis_func: the radial basis function used to transform the scaled
 38 |             distances.
 39 |     """
 40 | 
 41 |     def __init__(self, in_features, out_features, basis_func, centres=None, sigmas=None,
 42 |                  trainable=True):
 43 |         super(RBF, self).__init__()
 44 |         self.in_features = in_features
 45 |         self.out_features = out_features
 46 |         self.centres = nn.Parameter(
 47 |             torch.Tensor(out_features, in_features))
 48 |         self.centres.requires_grad = trainable
 49 |         self.sigmas = nn.Parameter(torch.Tensor(1, out_features))
 50 |         self.sigmas.requires_grad = trainable
 51 |         self.basis_func = basis_func
 52 |         self.pd = nn.PairwiseDistance()
 53 |         self.init_centres = centres
 54 |         self.init_sigmas = sigmas
 55 |         self.reset_parameters()
 56 | 
 57 |     def reset_parameters(self):
 58 |         if self.init_centres is not None:
 59 |             self.centres.data = torch.Tensor(
 60 |                 self.init_centres).to(self.centres.device)
 61 |         else:
 62 |             nn.init.normal_(self.centres, 0, 1)
 63 |         if self.init_sigmas is not None:
 64 |             self.sigmas.data = torch.Tensor(
 65 |                 self.init_sigmas).to(self.sigmas.device).T
 66 |         else:
 67 |             nn.init.constant_(self.sigmas, 1)
 68 | 
 69 |     def forward(self, input):
 70 |         distances = torch.cdist(input, self.centres) * torch.abs(self.sigmas)
 71 |         return self.basis_func(distances)
 72 | 
 73 | 
 74 | # RBFs
 75 | 
 76 | def gaussian(alpha):
 77 |     phi = torch.exp(-1 * alpha.pow(2))
 78 |     return phi
 79 | 
 80 | 
 81 | def linear(alpha):
 82 |     phi = alpha
 83 |     return phi
 84 | 
 85 | 
 86 | def quadratic(alpha):
 87 |     phi = alpha.pow(2)
 88 |     return phi
 89 | 
 90 | 
 91 | def inverse_quadratic(alpha):
 92 |     phi = torch.ones_like(alpha) / (torch.ones_like(alpha) + alpha.pow(2))
 93 |     return phi
 94 | 
 95 | 
 96 | def multiquadric(alpha):
 97 |     phi = (torch.ones_like(alpha) + alpha.pow(2)).pow(0.5)
 98 |     return phi
 99 | 
100 | 
101 | def inverse_multiquadric(alpha):
102 |     phi = torch.ones_like(
103 |         alpha) / (torch.ones_like(alpha) + alpha.pow(2)).pow(0.5)
104 |     return phi
105 | 
106 | 
107 | def spline(alpha):
108 |     phi = (alpha.pow(2) * torch.log(alpha + torch.ones_like(alpha)))
109 |     return phi
110 | 
111 | 
112 | def poisson_one(alpha):
113 |     phi = (alpha - torch.ones_like(alpha)) * torch.exp(-alpha)
114 |     return phi
115 | 
116 | 
117 | def poisson_two(alpha):
118 |     phi = ((alpha - 2 * torch.ones_like(alpha)) / 2 * torch.ones_like(alpha)) \
119 |         * alpha * torch.exp(-alpha)
120 |     return phi
121 | 
122 | 
123 | def matern32(alpha):
124 |     phi = (torch.ones_like(alpha) + 3**0.5 * alpha) * \
125 |         torch.exp(-3**0.5 * alpha)
126 |     return phi
127 | 
128 | 
129 | def matern52(alpha):
130 |     phi = (torch.ones_like(alpha) + 5**0.5 * alpha + (5 / 3)
131 |            * alpha.pow(2)) * torch.exp(-5**0.5 * alpha)
132 |     return phi
133 | 
134 | 
135 | def basis_func_dict():
136 |     """
137 |     A helper function that returns a dictionary containing each RBF
138 |     """
139 | 
140 |     bases = {'gaussian': gaussian,
141 |              'linear': linear,
142 |              'quadratic': quadratic,
143 |              'inverse quadratic': inverse_quadratic,
144 |              'multiquadric': multiquadric,
145 |              'inverse multiquadric': inverse_multiquadric,
146 |              'spline': spline,
147 |              'poisson one': poisson_one,
148 |              'poisson two': poisson_two,
149 |              'matern32': matern32,
150 |              'matern52': matern52}
151 |     return bases
152 | 


--------------------------------------------------------------------------------
/mliv/inference/gmm/agmm_v1/trainer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | import numpy as np
  4 | from .net import AGMM_Net
  5 | from mliv.utils import set_seed, cat
  6 | 
  7 | example = '''
  8 | from mliv.inference import AGMM
  9 | 
 10 | model = AGMM()
 11 | model.fit(data)
 12 | ITE = model.predict(data.train)
 13 | ATE,_ = model.ATE(data.train)
 14 | '''
 15 | 
 16 | class AGMM(object):
 17 |     def __init__(self) -> None:
 18 |         self.config = {
 19 |                     'methodName': 'AGMM',
 20 |                     'dropout': 0.1,
 21 |                     'n_hidden': 100,
 22 |                     'g_features': 100,
 23 |                     'learner_lr': 1e-4,
 24 |                     'adversary_lr': 1e-4,
 25 |                     'learner_l2': 1e-3,
 26 |                     'adversary_l2': 1e-4,
 27 |                     'adversary_norm_reg': 1e-3,
 28 |                     'epochs': 100,
 29 |                     'batch_size': 100,
 30 |                     'sigma': 2.0,
 31 |                     'n_centers': 100,
 32 |                     'device': 'cuda:0',
 33 |                     'mode': 'final',
 34 |                     'resultDir': './Results/tmp/',
 35 |                     'seed': 2022,   
 36 |                     }
 37 | 
 38 |     def set_Configuration(self, config):
 39 |         self.config = config
 40 | 
 41 |     def fit(self, data, exp=-1, config=None):
 42 |         if config is None:
 43 |             config = self.config
 44 | 
 45 |         device = config['device']
 46 |         p = config['dropout']
 47 |         n_hidden = config['n_hidden']
 48 |         g_features = config['g_features']
 49 |         learner_lr = config['learner_lr']
 50 |         adversary_lr = config['adversary_lr']
 51 |         learner_l2 = config['learner_l2']
 52 |         adversary_l2 = config['adversary_l2']
 53 |         adversary_norm_reg = config['adversary_norm_reg']
 54 |         epochs = config['epochs']
 55 |         bs = config['batch_size']
 56 |         sigma = config['sigma'] / g_features
 57 |         n_centers = config['n_centers']
 58 |         resultDir = config['resultDir']
 59 |         self.mode = config['mode']
 60 | 
 61 |         set_seed(config['seed'])
 62 |         data.numpy()
 63 | 
 64 |         self.z_dim = data.train.z.shape[1]
 65 |         self.x_dim = data.train.x.shape[1]
 66 |         self.t_dim = data.train.t.shape[1]
 67 | 
 68 |         learner = nn.Sequential(nn.Dropout(p=p), nn.Linear(self.t_dim+self.x_dim, n_hidden), nn.LeakyReLU(),
 69 |                                 nn.Dropout(p=p), nn.Linear(n_hidden, n_hidden), nn.ReLU(),
 70 |                                 nn.Dropout(p=p), nn.Linear(n_hidden, 1))
 71 | 
 72 |         adversary_fn = nn.Sequential(nn.Dropout(p=p), nn.Linear(self.z_dim+self.x_dim, n_hidden), nn.LeakyReLU(),
 73 |                                 nn.Dropout(p=p), nn.Linear(n_hidden, n_hidden), nn.ReLU(),
 74 |                                 nn.Dropout(p=p), nn.Linear(n_hidden, 1))
 75 | 
 76 | 
 77 |         Z_train, T_train, Y_train, G_train = map(lambda x: torch.Tensor(x).to(device), (np.concatenate([data.train.z, data.train.x],1), np.concatenate([data.train.t, data.train.x],1), data.train.y, data.train.g))
 78 |         Z_val, T_val, Y_val, G_val = map(lambda x: torch.Tensor(x).to(device), (np.concatenate([data.valid.z, data.valid.x],1), np.concatenate([data.valid.t, data.valid.x],1), data.valid.y, data.valid.g))
 79 |         T_test_tens = torch.Tensor(np.concatenate([data.test.t, data.test.x],1)).to(device)
 80 |         G_test_tens = torch.Tensor(data.test.g).to(device)
 81 | 
 82 |         print('Run {}-th experiment for {}. '.format(exp, config['methodName']))
 83 | 
 84 |         agmm = AGMM_Net(learner, adversary_fn).fit(Z_train, T_train, Y_train, Z_val, T_val, Y_val, T_test_tens, G_val, 
 85 |                                                 learner_lr=learner_lr, adversary_lr=adversary_lr,
 86 |                                                 learner_l2=learner_l2, adversary_l2=adversary_l2,
 87 |                                                 n_epochs=epochs, bs=bs, 
 88 |                                                 results_dir=resultDir, device=device, verbose=0)
 89 |         
 90 |         print('End. ' + '-'*20)
 91 | 
 92 |         def estimation(data):
 93 |             input0 = torch.Tensor(np.concatenate([data.t-data.t, data.x],1)).to(device)
 94 |             point0 = agmm.predict(input0, model=self.mode)
 95 | 
 96 |             inputt = torch.Tensor(np.concatenate([data.t, data.x],1)).to(device)
 97 |             pointt = agmm.predict(inputt, model=self.mode)
 98 | 
 99 |             return point0, pointt
100 | 
101 |         self.estimation = estimation
102 |         self.device = device
103 |         self.agmm = agmm
104 | 
105 |     def predict(self, data=None, t=None, x=None):
106 |         if data is None:
107 |             data = self.data.test
108 | 
109 |         if x is None:
110 |             x = data.x
111 | 
112 |         if t is None:
113 |             t = data.t
114 | 
115 |         pred  = self.agmm.predict(torch.Tensor(np.concatenate([t, x],1)).to(self.device), model=self.mode)
116 | 
117 |         return pred
118 | 
119 |     def ITE(self, data=None, t=None, x=None):
120 |         if data is None:
121 |             data = self.data.test
122 | 
123 |         if x is None:
124 |             x = data.x
125 | 
126 |         if t is None:
127 |             t = data.t
128 | 
129 |         ITE_0 = self.agmm.predict(torch.Tensor(np.concatenate([t-t, x],1)).to(self.device), model=self.mode)
130 |         ITE_1 = self.agmm.predict(torch.Tensor(np.concatenate([t-t+1, x],1)).to(self.device), model=self.mode)
131 |         ITE_t = self.agmm.predict(torch.Tensor(np.concatenate([t, x],1)).to(self.device), model=self.mode)
132 | 
133 |         return ITE_0,ITE_1,ITE_t
134 | 
135 |     def ATE(self, data=None, t=None, x=None):
136 |         ITE_0,ITE_1,ITE_t = self.ITE(data,t,x)
137 | 
138 |         return np.mean(ITE_1-ITE_0), np.mean(ITE_t-ITE_0)
139 | 


--------------------------------------------------------------------------------
/mliv/inference/gmm/deepgmm_v1/__init__.py:
--------------------------------------------------------------------------------
 1 | from .trainer import DeepGMM
 2 | 
 3 | example = '''
 4 | from mliv.inference import DeepGMM
 5 | 
 6 | model = DeepGMM()
 7 | model.fit(data)
 8 | ITE = model.predict(data.train)
 9 | ATE,_ = model.ATE(data.train)
10 | '''


--------------------------------------------------------------------------------
/mliv/inference/gmm/deepgmm_v1/dataclass.py:
--------------------------------------------------------------------------------
  1 | from typing import NamedTuple, Optional
  2 | import numpy as np
  3 | import torch
  4 | 
  5 | def load_TrainDataSet(data):
  6 |     try:
  7 |         x = data.x
  8 |     except:
  9 |         x = None
 10 | 
 11 |     train_data = TrainDataSet(treatment=data.t,
 12 |                               instrumental=data.z,
 13 |                               covariate=x,
 14 |                               outcome=data.y,
 15 |                               structural=data.g)
 16 |     
 17 |     return train_data
 18 |     
 19 | def load_TestDataSet(data):
 20 |     try:
 21 |         z = data.z
 22 |     except:
 23 |         z = None
 24 |     
 25 |     try:
 26 |         x = data.x
 27 |     except:
 28 |         x = None
 29 | 
 30 |     try:
 31 |         y = data.y
 32 |     except:
 33 |         y = None
 34 | 
 35 |     test_data = TestDataSet(treatment=data.t,
 36 |                             instrumental=z,
 37 |                             covariate=x,
 38 |                             outcome=y,
 39 |                             structural=data.g)
 40 |     
 41 |     return test_data
 42 | 
 43 | class TrainDataSet(NamedTuple):
 44 |     treatment: np.ndarray
 45 |     instrumental: np.ndarray
 46 |     covariate: Optional[np.ndarray]
 47 |     outcome: np.ndarray
 48 |     structural: np.ndarray
 49 | 
 50 | class TestDataSet(NamedTuple):
 51 |     treatment: np.ndarray
 52 |     covariate: Optional[np.ndarray]
 53 |     structural: np.ndarray
 54 |     instrumental: Optional[np.ndarray]
 55 |     outcome: Optional[np.ndarray]
 56 | 
 57 | class TrainDataSetTorch(NamedTuple):
 58 |     treatment: torch.Tensor
 59 |     instrumental: torch.Tensor
 60 |     covariate: torch.Tensor
 61 |     outcome: torch.Tensor
 62 |     structural: torch.Tensor
 63 | 
 64 |     @classmethod
 65 |     def from_numpy(cls, train_data: TrainDataSet):
 66 |         covariate = None
 67 |         if train_data.covariate is not None:
 68 |             covariate = torch.tensor(train_data.covariate, dtype=torch.float32)
 69 |         return TrainDataSetTorch(treatment=torch.tensor(train_data.treatment, dtype=torch.float32),
 70 |                                  instrumental=torch.tensor(train_data.instrumental, dtype=torch.float32),
 71 |                                  covariate=covariate,
 72 |                                  outcome=torch.tensor(train_data.outcome, dtype=torch.float32),
 73 |                                  structural=torch.tensor(train_data.structural, dtype=torch.float32))
 74 | 
 75 |     def to(self, device):
 76 |         covariate = None
 77 |         if self.covariate is not None:
 78 |             covariate = self.covariate.to(device)
 79 |         return TrainDataSetTorch(treatment=self.treatment.to(device),
 80 |                                  instrumental=self.instrumental.to(device),
 81 |                                  covariate=covariate,
 82 |                                  outcome=self.outcome.to(device),
 83 |                                  structural=self.structural.to(device))
 84 | 
 85 | 
 86 | class TestDataSetTorch(NamedTuple):
 87 |     treatment: torch.Tensor
 88 |     instrumental: torch.Tensor
 89 |     covariate: torch.Tensor
 90 |     outcome: torch.Tensor
 91 |     structural: torch.Tensor
 92 | 
 93 |     @classmethod
 94 |     def from_numpy(cls, test_data: TestDataSet):
 95 |         covariate = None
 96 |         instrumental = None
 97 |         outcome = None
 98 |         if test_data.covariate is not None:
 99 |             covariate = torch.tensor(test_data.covariate, dtype=torch.float32)
100 |         if test_data.instrumental is not None:
101 |             instrumental = torch.tensor(test_data.instrumental, dtype=torch.float32)
102 |         if test_data.outcome is not None:
103 |             outcome = torch.tensor(test_data.outcome, dtype=torch.float32)
104 |         return TestDataSetTorch(treatment=torch.tensor(test_data.treatment, dtype=torch.float32),
105 |                                 covariate=covariate,
106 |                                 instrumental=instrumental,
107 |                                 outcome=outcome,
108 |                                 structural=torch.tensor(test_data.structural, dtype=torch.float32))
109 |     def to(self, device):
110 |         covariate = None
111 |         instrumental = None
112 |         outcome = None
113 |         if self.covariate is not None:
114 |             covariate = self.covariate.to(device)
115 |         if self.instrumental is not None:
116 |             instrumental = self.instrumental.to(device)
117 |         if self.outcome is not None:
118 |             outcome = self.outcome.to(device)
119 |         return TestDataSetTorch(treatment=self.treatment.to(device),
120 |                                 covariate=covariate,
121 |                                 instrumental=instrumental,
122 |                                 outcome=outcome,
123 |                                 structural=self.structural.to(device))


--------------------------------------------------------------------------------
/mliv/inference/gmm/deepgmm_v1/model.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | import torch
 3 | from torch import nn
 4 | import numpy as np
 5 | import logging
 6 | 
 7 | from .utils.pytorch_linear_reg_utils import fit_linear, linear_reg_pred, outer_prod, add_const_col
 8 | from .dataclass import TrainDataSet, TestDataSet, TrainDataSetTorch, TestDataSetTorch
 9 | 
10 | logger = logging.getLogger()
11 | 
12 | 
13 | class DeepGMMModel:
14 | 
15 |     def __init__(self,
16 |                  primal_net: nn.Module,
17 |                  dual_net: nn.Module
18 |                  ):
19 |         self.primal_net = primal_net
20 |         self.dual_net = dual_net
21 | 
22 |     def predict_t(self, treatment: torch.Tensor):
23 |         self.primal_net.train(False)
24 |         return self.primal_net(treatment)
25 | 
26 |     def predict(self, treatment: np.ndarray):
27 |         treatment_t = torch.tensor(treatment, dtype=torch.float32)
28 |         return self.predict_t(treatment_t).data.numpy()
29 | 
30 |     def evaluate_t(self, test_data: TestDataSetTorch):
31 |         target = test_data.structural
32 |         with torch.no_grad():
33 |             pred = self.predict_t(test_data.treatment)
34 |         return (torch.norm((target - pred)) ** 2) / target.size()[0]
35 | 
36 |     def evaluate(self, test_data: TestDataSet):
37 |         return self.evaluate_t(TestDataSetTorch.from_numpy(test_data)).data.item()
38 | 


--------------------------------------------------------------------------------
/mliv/inference/gmm/deepgmm_v1/nn_structure/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple, Optional
 2 | 
 3 | import torch
 4 | from torch import nn
 5 | from torch.nn import functional as F
 6 | from torch.nn.utils import spectral_norm
 7 | 
 8 | 
 9 | from .nn_structure_for_demand_old import build_net_for_demand_old
10 | from .nn_structure_for_sin import build_net_for_sin
11 | from .nn_structure_for_dsprite import build_net_for_dsprite
12 | from .nn_structure_for_demand_image import build_net_for_demand_image
13 | 
14 | import logging
15 | 
16 | logger = logging.getLogger()
17 | 
18 | 
19 | def build_extractor(data_name: str) -> Tuple[nn.Module, nn.Module]:
20 |     if data_name == "demand_old":
21 |         logger.info("build old model without image")
22 |         return build_net_for_demand_old()
23 |     elif data_name == "sin":
24 |         return build_net_for_sin()
25 |     elif data_name == "dsprite":
26 |         return build_net_for_dsprite()
27 |     elif data_name == "demand_image":
28 |         return build_net_for_demand_image()
29 |     else:
30 |         raise ValueError(f"data name {data_name} is not valid")
31 | 


--------------------------------------------------------------------------------
/mliv/inference/gmm/deepgmm_v1/nn_structure/nn_structure_for_demand_image.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | from torch.nn.utils import spectral_norm
 5 | 
 6 | 
 7 | class ImageFeature(nn.Module):
 8 | 
 9 |     def __init__(self, num_dense_feature: int):
10 |         super(ImageFeature, self).__init__()
11 |         self.num_dense_feature = num_dense_feature
12 |         self.conv1 = spectral_norm(nn.Conv2d(1, 64, 3))
13 |         self.conv2 = spectral_norm(nn.Conv2d(64, 64, 3))
14 |         self.maxpool = nn.MaxPool2d(2)
15 |         self.dropout1 = nn.Dropout(0.1)
16 |         self.dropout2 = nn.Dropout(0.1)
17 |         self.batch = nn.BatchNorm1d(9216)
18 |         self.linear1 = nn.Linear(9216, 128)
19 |         self.linear2 = nn.Linear(128, 64)
20 | 
21 |     def forward(self, data):
22 |         dense = data[:, :self.num_dense_feature]
23 |         image = data[:, self.num_dense_feature:]
24 |         image = image.reshape((-1, 1, 28, 28))
25 |         image_feature = F.relu(self.conv1(image))
26 |         image_feature = self.maxpool(F.relu(self.conv2(image_feature)))
27 |         image_feature = torch.flatten(image_feature, start_dim=1)
28 |         image_feature = self.dropout1(image_feature)
29 |         image_feature = self.dropout2(F.relu(self.linear1(image_feature)))
30 |         image_feature = self.linear2(image_feature)
31 |         return torch.cat([dense, image_feature], dim=1)
32 | 
33 | 
34 | class LimitCol(nn.Module):
35 | 
36 |     def __init__(self, ndim: int):
37 |         super(LimitCol, self).__init__()
38 |         self.ndim = ndim
39 | 
40 |     def forward(self, data):
41 |         return data[:, :self.ndim]
42 | 
43 | 
44 | def build_net_for_demand_image():
45 | 
46 |     dual_net = nn.Sequential(ImageFeature(2),
47 |                              nn.Linear(66, 32),
48 |                              nn.BatchNorm1d(32),
49 |                              nn.ReLU(),
50 |                              nn.Linear(32, 1))
51 | 
52 |     primal_net = nn.Sequential(ImageFeature(2),
53 |                                nn.Linear(66, 32),
54 |                                nn.BatchNorm1d(32),
55 |                                nn.ReLU(),
56 |                                nn.Linear(32, 1))
57 | 
58 |     return primal_net, dual_net
59 | 


--------------------------------------------------------------------------------
/mliv/inference/gmm/deepgmm_v1/nn_structure/nn_structure_for_demand_old.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from typing import Tuple
 4 | 
 5 | 
 6 | def build_net_for_demand_old() -> Tuple[nn.Module, nn.Module]:
 7 |     response_net = nn.Sequential(nn.Linear(3, 128),
 8 |                                  nn.ReLU(),
 9 |                                  nn.Linear(128, 64),
10 |                                  nn.ReLU(),
11 |                                  nn.Linear(64, 32),
12 |                                  nn.Tanh(),
13 |                                  nn.Linear(32, 1))
14 | 
15 |     dual_net = nn.Sequential(nn.Linear(3, 128),
16 |                              nn.ReLU(),
17 |                              nn.Linear(128, 64),
18 |                              nn.ReLU(),
19 |                              nn.Linear(64, 1))
20 | 
21 |     return response_net, dual_net
22 | 


--------------------------------------------------------------------------------
/mliv/inference/gmm/deepgmm_v1/nn_structure/nn_structure_for_dsprite.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | from torch.nn.utils import spectral_norm
 5 | 
 6 | 
 7 | class View(nn.Module):
 8 |     def __init__(self, size):
 9 |         super(View, self).__init__()
10 |         self.size = size
11 | 
12 |     def forward(self, tensor):
13 |         return tensor.view(self.size)
14 | 
15 | 
16 | class ImageFeature(nn.Module):
17 | 
18 |     def __init__(self):
19 |         super(ImageFeature, self).__init__()
20 |         self.treatment_net = nn.Sequential(
21 |             nn.Conv2d(1, 32, 4, 2, 1),  # B,  32, 32, 32
22 |             nn.ReLU(True),
23 |             nn.Conv2d(32, 32, 4, 2, 1),  # B,  32, 16, 16
24 |             nn.ReLU(True),
25 |             nn.Conv2d(32, 32, 4, 2, 1),  # B,  32,  8,  8
26 |             nn.ReLU(True),
27 |             nn.Conv2d(32, 32, 4, 2, 1),  # B,  32,  4,  4
28 |             nn.ReLU(True),
29 |             View((-1, 32 * 4 * 4)),  # B, 512
30 |             nn.BatchNorm1d(32 * 4 * 4),
31 |             nn.Linear(32 * 4 * 4, 256),  # B, 256
32 |             nn.ReLU(True),
33 |             nn.Linear(256, 128),  # B, 256
34 |             nn.ReLU(True),
35 |             nn.Linear(128, 32),  # B, z_dim*2
36 |             nn.BatchNorm1d(32),
37 |             nn.ReLU()
38 |         )
39 | 
40 |     def forward(self, data):
41 |         image = data.reshape((-1, 1, 64, 64))
42 |         return self.treatment_net(image)
43 | 
44 | 
45 | def build_net_for_dsprite():
46 |     primal_net = nn.Sequential(spectral_norm(nn.Linear(64 * 64, 1024)),
47 |                                nn.ReLU(),
48 |                                spectral_norm(nn.Linear(1024, 512)),
49 |                                nn.ReLU(),
50 |                                nn.BatchNorm1d(512),
51 |                                spectral_norm(nn.Linear(512, 128)),
52 |                                nn.ReLU(),
53 |                                spectral_norm(nn.Linear(128, 32)),
54 |                                nn.BatchNorm1d(32),
55 |                                nn.Tanh(),
56 |                                nn.Linear(32, 1))
57 | 
58 |     # treatment_net = ImageFeature()
59 |     dual_net = nn.Sequential(spectral_norm(nn.Linear(3, 256)),
60 |                              nn.ReLU(),
61 |                              spectral_norm(nn.Linear(256, 128)),
62 |                              nn.ReLU(),
63 |                              nn.BatchNorm1d(128),
64 |                              spectral_norm(nn.Linear(128, 128)),
65 |                              nn.ReLU(),
66 |                              nn.BatchNorm1d(128),
67 |                              spectral_norm(nn.Linear(128, 32)),
68 |                              nn.BatchNorm1d(32),
69 |                              nn.ReLU(),
70 |                              nn.Linear(32, 1))
71 | 
72 |     return primal_net, dual_net
73 | 


--------------------------------------------------------------------------------
/mliv/inference/gmm/deepgmm_v1/nn_structure/nn_structure_for_sin.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from typing import Tuple
 4 | 
 5 | 
 6 | def build_net_for_sin() -> Tuple[nn.Module, nn.Module]:
 7 |     response_net = nn.Sequential(nn.Linear(1, 20),
 8 |                                  nn.LeakyReLU(),
 9 |                                  nn.Linear(20, 3),
10 |                                  nn.LeakyReLU(),
11 |                                  nn.Linear(3, 1))
12 | 
13 |     dual_net = nn.Sequential(nn.Linear(2, 20),
14 |                              nn.LeakyReLU(),
15 |                              nn.Linear(20, 1))
16 | 
17 |     return response_net, dual_net
18 | 


--------------------------------------------------------------------------------
/mliv/inference/gmm/deepgmm_v1/trainer.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, Any, Optional, List
  2 | import torch
  3 | from torch import nn
  4 | import numpy as np
  5 | 
  6 | from .model import DeepGMMModel
  7 | from .dataclass import TrainDataSet, TrainDataSetTorch, TestDataSetTorch, TestDataSet
  8 | from mliv.utils import set_seed, cat
  9 | 
 10 | example = '''
 11 | from mliv.inference import DeepGMM
 12 | 
 13 | model = DeepGMM()
 14 | model.fit(data)
 15 | ITE = model.predict(data.train)
 16 | ATE,_ = model.ATE(data.train)
 17 | '''
 18 | 
 19 | def build_net_for_demand(z_dim, x_dim, t_dim):
 20 |     response_net = nn.Sequential(nn.Linear(z_dim + x_dim, 128),
 21 |                                  nn.ReLU(),
 22 |                                  nn.Linear(128, 64),
 23 |                                  nn.ReLU(),
 24 |                                  nn.Linear(64, 32),
 25 |                                  nn.Tanh(),
 26 |                                  nn.Linear(32, 1))
 27 | 
 28 |     dual_net = nn.Sequential(nn.Linear(t_dim + x_dim, 128),
 29 |                              nn.ReLU(),
 30 |                              nn.Linear(128, 64),
 31 |                              nn.ReLU(),
 32 |                              nn.Linear(64, 1))
 33 | 
 34 |     return response_net, dual_net
 35 | 
 36 | class DeepGMMTrainer(object):
 37 | 
 38 |     def __init__(self, data_list: List, net_list: List, train_params: Dict[str, Any],
 39 |                  device: str = 'cpu'):
 40 |         self.data_list = data_list
 41 |         self.device = device if torch.cuda.is_available() else 'cpu'
 42 | 
 43 |         # configure training params
 44 |         self.dual_iter: int = train_params["dual_iter"]
 45 |         self.primal_iter: int = train_params["primal_iter"]
 46 |         self.epochs: int = train_params["epochs"]
 47 | 
 48 |         # build networks
 49 |         networks = net_list
 50 |         self.primal_net: nn.Module = networks[0]
 51 |         self.dual_net: nn.Module = networks[1]
 52 |         self.primal_weight_decay = train_params["primal_weight_decay"]
 53 |         self.dual_weight_decay = train_params["dual_weight_decay"]
 54 | 
 55 |         self.primal_net.to(self.device)
 56 |         self.dual_net.to(self.device)
 57 | 
 58 |         self.primal_opt = torch.optim.Adam(self.primal_net.parameters(),
 59 |                                            weight_decay=self.primal_weight_decay,
 60 |                                            lr=0.0005, betas=(0.5, 0.9))
 61 |         self.dual_opt = torch.optim.Adam(self.dual_net.parameters(),
 62 |                                          weight_decay=self.dual_weight_decay,
 63 |                                          lr=0.0025, betas=(0.5, 0.9))
 64 | 
 65 |         # build monitor
 66 |         self.monitor = None
 67 | 
 68 |     def train(self, rand_seed: int = 42, verbose: int = 0, epoch_show: int = 20) -> float:
 69 |         """
 70 | 
 71 |         Parameters
 72 |         ----------
 73 |         rand_seed: int
 74 |             random seed
 75 |         verbose : int
 76 |             Determine the level of logging
 77 |         Returns
 78 |         -------
 79 |         oos_result : float
 80 |             The performance of model evaluated by oos
 81 |         """
 82 |         train_data = self.data_list[0]
 83 |         test_data = self.data_list[2]
 84 |         if train_data.covariate is not None:
 85 |             train_data = TrainDataSet(treatment=np.concatenate([train_data.treatment, train_data.covariate], axis=1),
 86 |                                       structural=train_data.structural,
 87 |                                       covariate=None,
 88 |                                       instrumental=train_data.instrumental,
 89 |                                       outcome=train_data.outcome)
 90 |             test_data = TestDataSet(treatment=np.concatenate([test_data.treatment, test_data.covariate], axis=1),
 91 |                                      covariate=None,
 92 |                                      structural=test_data.structural)
 93 | 
 94 |         train_data_t = TrainDataSetTorch.from_numpy(train_data)
 95 |         test_data_t = TestDataSetTorch.from_numpy(test_data)
 96 | 
 97 |         train_data_t = train_data_t.to(self.device)
 98 |         test_data_t = test_data_t.to(self.device)
 99 | 
100 |         for t in range(self.epochs):
101 |             self.dual_update(train_data_t, verbose)
102 |             self.primal_update(train_data_t, verbose)
103 |             if t % epoch_show == 0 or t == self.epochs - 1:
104 |                 print(f"Epoch {t} ended")
105 |                 if verbose >= 1:
106 |                     print(f"Epoch {t} ended")
107 |                     mdl = DeepGMMModel(self.primal_net, self.dual_net)
108 |                     print(f"test error {mdl.evaluate_t(test_data_t).data.item()}")
109 | 
110 |         mdl = DeepGMMModel(self.primal_net, self.dual_net)
111 |         oos_loss: float = mdl.evaluate_t(test_data_t).data.item()
112 |         print(f"test_loss:{oos_loss}")
113 |         return oos_loss
114 | 
115 |     def dual_update(self, train_data_t: TrainDataSetTorch, verbose: int):
116 |         self.dual_net.train(True)
117 |         self.primal_net.train(False)
118 |         with torch.no_grad():
119 |             epsilon = train_data_t.outcome - self.primal_net(train_data_t.treatment)
120 |         for t in range(self.dual_iter):
121 |             self.dual_opt.zero_grad()
122 |             moment = torch.mean(self.dual_net(train_data_t.instrumental) * epsilon)
123 |             reg = 0.25 * torch.mean((self.dual_net(train_data_t.instrumental) * epsilon) ** 2)
124 |             loss = -moment + reg
125 |             if verbose >= 2:
126 |                 print(f"dual loss:{loss.data.item()}")
127 |             loss.backward()
128 |             self.dual_opt.step()
129 | 
130 |     def primal_update(self, train_data_t: TrainDataSetTorch, verbose: int):
131 |         self.dual_net.train(False)
132 |         self.primal_net.train(True)
133 |         with torch.no_grad():
134 |             dual = self.dual_net(train_data_t.instrumental)
135 |         for t in range(self.primal_iter):
136 |             self.primal_opt.zero_grad()
137 |             epsilon = train_data_t.outcome - self.primal_net(train_data_t.treatment)
138 |             loss = torch.mean(dual * epsilon)
139 |             if verbose >= 2:
140 |                 print(f"primal loss:{loss.data.item()}")
141 |             loss.backward()
142 |             self.primal_opt.step()
143 | 
144 | class DeepGMM(object):
145 |     def __init__(self) -> None:
146 |         self.config = {
147 |                     'methodName': 'DeepGMM',
148 |                     'resultDir': './Results/tmp/',
149 |                     "primal_iter": 1, 
150 |                     "dual_iter": 5, 
151 |                     "epochs": 300, 
152 |                     "primal_weight_decay": 0.0, 
153 |                     "dual_weight_decay": 0.0,
154 |                     'device': 'cuda:0',
155 |                     'verbose': 1, 
156 |                     'epoch_show': 50,
157 |                     'seed': 2022,   
158 |                     }
159 | 
160 |     def set_Configuration(self, config):
161 |         self.config = config
162 | 
163 |     def fit(self, data, exp=-1, config=None):
164 |         if config is None:
165 |             config = self.config
166 | 
167 |         set_seed(config['seed'])
168 |         data.numpy()
169 | 
170 |         self.z_dim = data.train.z.shape[1]
171 |         self.x_dim = data.train.x.shape[1]
172 |         self.t_dim = data.train.t.shape[1]
173 | 
174 |         response_net, dual_net = build_net_for_demand(self.z_dim,self.x_dim,self.t_dim)
175 |         net_list = [response_net, dual_net]
176 | 
177 |         train_data = TrainDataSet(treatment=np.concatenate([data.train.t, data.train.x],1),
178 |                                     instrumental=np.concatenate([data.train.z, data.train.x],1),
179 |                                     covariate=None,
180 |                                     outcome=data.train.y,
181 |                                     structural=data.train.z)
182 | 
183 |         val_data = TrainDataSet(treatment=np.concatenate([data.valid.t, data.valid.x],1),
184 |                                 instrumental=np.concatenate([data.valid.z, data.valid.x],1),
185 |                                 covariate=None,
186 |                                 outcome=data.valid.y,
187 |                                 structural=data.valid.z)
188 | 
189 |         test_data = TestDataSet(treatment=np.concatenate([data.test.t, data.test.x],1),
190 |                                 instrumental=np.concatenate([data.test.z, data.test.x],1),
191 |                                 covariate=None,
192 |                                 outcome=None,
193 |                                 structural=data.test.z)
194 | 
195 |         data_list = [train_data, val_data, test_data]
196 | 
197 |         train_config = {"primal_iter": config['primal_iter'], 
198 |                         "dual_iter": config['dual_iter'], 
199 |                         "epochs": config['epochs'], 
200 |                         "primal_weight_decay": config['primal_weight_decay'], 
201 |                         "dual_weight_decay": config['dual_weight_decay'], 
202 |                         }
203 |         device = config['device']
204 | 
205 |         print('Run {}-th experiment for {}. '.format(exp, config['methodName']))
206 | 
207 |         trainer = DeepGMMTrainer(data_list, net_list, train_config, device)
208 |         test_loss = trainer.train(rand_seed=config['seed'], verbose=config['verbose'], epoch_show=config['epoch_show'])
209 | 
210 |         def estimation(data):
211 |             input0 = torch.Tensor(np.concatenate([data.t-data.t, data.x],1)).to(self.device)
212 |             point0 = response_net(input0).detach().cpu().data().numpy()
213 | 
214 |             inputt = torch.Tensor(np.concatenate([data.t, data.x],1)).to(self.device)
215 |             pointt = response_net(inputt).detach().cpu().data().numpy()
216 | 
217 |             return point0, pointt
218 | 
219 |         print('End. ' + '-'*20)
220 | 
221 |         self.estimation = estimation
222 |         self.response_net = response_net
223 |         self.dual_net = dual_net
224 |         self.device = device
225 | 
226 | 
227 |     def predict(self, data=None, t=None, x=None):
228 |         if data is None:
229 |             data = self.data.test
230 | 
231 |         if x is None:
232 |             x = data.x
233 | 
234 |         if t is None:
235 |             t = data.t
236 | 
237 |         with torch.no_grad():
238 |             input = torch.Tensor(np.concatenate([t,x],1)).to(self.device)
239 |             pred = self.response_net(input).detach().cpu().numpy()
240 | 
241 |         return pred
242 | 
243 |     def ITE(self, data=None, t=None, x=None):
244 |         if data is None:
245 |             data = self.data.test
246 | 
247 |         if x is None:
248 |             x = data.x
249 | 
250 |         if t is None:
251 |             t = data.t
252 | 
253 |         input_0 = torch.Tensor(np.concatenate([t-t,x],1)).to(self.device)
254 |         input_1 = torch.Tensor(np.concatenate([t-t+1,x],1)).to(self.device)
255 |         input_t = torch.Tensor(np.concatenate([t,x],1)).to(self.device)
256 | 
257 |         ITE_0 = self.response_net(input_0).detach().cpu().numpy()
258 |         ITE_1 = self.response_net(input_1).detach().cpu().numpy()
259 |         ITE_t = self.response_net(input_t).detach().cpu().numpy()
260 | 
261 |         return ITE_0,ITE_1,ITE_t
262 | 
263 |     def ATE(self, data=None, t=None, x=None):
264 |         ITE_0,ITE_1,ITE_t = self.ITE(data,t,x)
265 | 
266 |         return np.mean(ITE_1-ITE_0), np.mean(ITE_t-ITE_0)


--------------------------------------------------------------------------------
/mliv/inference/gmm/deepgmm_v1/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Any, Iterator, Tuple
 2 | from itertools import product
 3 | 
 4 | 
 5 | def grid_search_dict(org_params: Dict[str, Any]) -> Iterator[Tuple[str, Dict[str, Any]]]:
 6 |     """
 7 |     Iterate list in dict to do grid search.
 8 | 
 9 |     Examples
10 |     --------
11 |     >>> test_dict = dict(a=[1,2], b = [1,2,3], c = 4)
12 |     >>> list(grid_search_dict(test_dict))
13 |     [('a:1-b:1', {'c': 4, 'a': 1, 'b': 1}),
14 |     ('a:1-b:2', {'c': 4, 'a': 1, 'b': 2}),
15 |     ('a:1-b:3', {'c': 4, 'a': 1, 'b': 3}),
16 |     ('a:2-b:1', {'c': 4, 'a': 2, 'b': 1}),
17 |     ('a:2-b:2', {'c': 4, 'a': 2, 'b': 2}),
18 |     ('a:2-b:3', {'c': 4, 'a': 2, 'b': 3})]
19 |     >>> test_dict = dict(a=1, b = 2, c = 3)
20 |     >>> list(grid_search_dict(test_dict))
21 |     [('one', {'a': 1, 'b': 2, 'c': 3})]
22 | 
23 |     Parameters
24 |     ----------
25 |     org_params : Dict
26 |         Dictionary to be grid searched
27 | 
28 |     Yields
29 |     ------
30 |     name : str
31 |         Name that describes the parameter of the grid
32 |     param: Dict[str, Any]
33 |         Dictionary that contains the parameter at grid
34 | 
35 |     """
36 |     search_keys = []
37 |     non_search_keys = []
38 |     for key in org_params.keys():
39 |         if isinstance(org_params[key], list):
40 |             search_keys.append(key)
41 |         else:
42 |             non_search_keys.append(key)
43 |     if len(search_keys) == 0:
44 |         yield "one", org_params
45 |     else:
46 |         param_generator = product(*[org_params[key] for key in search_keys])
47 |         for one_param_set in param_generator:
48 |             one_dict = {k: org_params[k] for k in non_search_keys}
49 |             tmp = dict(list(zip(search_keys, one_param_set)))
50 |             one_dict.update(tmp)
51 |             one_name = "-".join([k + ":" + str(tmp[k]) for k in search_keys])
52 |             yield one_name, one_dict
53 | 


--------------------------------------------------------------------------------
/mliv/inference/gmm/deepgmm_v1/utils/custom_logging.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import shutil
 3 | from typing import Union
 4 | from pathlib import Path, PosixPath
 5 | import requests
 6 | import logging
 7 | 
 8 | 
 9 | LOG_FORMAT = logging.Formatter(
10 |     '%(name)s: %(asctime)s,%(msecs)d %(levelname)-4s [%(filename)s:%(lineno)d] %(message)s')
11 | 
12 | logger = logging.getLogger()
13 | 
14 | 
15 | class SlackLoggingHandler(logging.StreamHandler):
16 |     def __init__(self, webhook_url, stream=None):
17 |         super(SlackLoggingHandler, self).__init__(stream)
18 |         self.url = webhook_url
19 | 
20 |     def emit(self, record):
21 |         message = super(SlackLoggingHandler, self).format(record)
22 |         requests.post(self.url, json.dumps({'text': message}))
23 | 
24 | 
25 | def configure_logger(logger_name: str,
26 |                      log_format: str = LOG_FORMAT,
27 |                      log_dir: Union[str, Path, PosixPath, None] = None,
28 |                      webhook_url: Union[str, None] = None):
29 |     # get root logger
30 |     logger = logging.getLogger()
31 |     logger.name = logger_name
32 | 
33 |     # slack post
34 |     if webhook_url is not None:
35 |         slack_handler = SlackLoggingHandler(webhook_url)
36 |         slack_handler.setLevel(logging.ERROR)
37 |         slack_handler.setFormatter(log_format)
38 |         logger.addHandler(slack_handler)
39 | 
40 |     if log_dir is not None:
41 |         log_dir = Path(log_dir)
42 |         if log_dir.exists():
43 |             shutil.rmtree(log_dir)
44 |         log_dir.mkdir(parents=True)
45 |         log_filename = str(log_dir.joinpath('text_log.txt'))
46 |         file_handler = logging.FileHandler(log_filename)
47 |         file_handler.setLevel(logging.INFO)
48 |         file_handler.setFormatter(log_format)
49 |         logger.addHandler(file_handler)
50 | 
51 |     # stdout
52 |     stream_handler = logging.StreamHandler()
53 |     stream_handler.setLevel(logging.INFO)
54 |     stream_handler.setFormatter(log_format)
55 |     logger.addHandler(stream_handler)
56 | 
57 |     logger.setLevel(logging.INFO)
58 | 


--------------------------------------------------------------------------------
/mliv/inference/gmm/deepgmm_v1/utils/pytorch_linear_reg_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def fit_linear(target: torch.Tensor,
 5 |                feature: torch.Tensor,
 6 |                reg: float = 0.0):
 7 |     """
 8 |     Parameters
 9 |     ----------
10 |     target: torch.Tensor[nBatch, dim1, dim2, ...]
11 |     feature: torch.Tensor[nBatch, feature_dim]
12 |     reg: float
13 |         value of l2 regularizer
14 |     Returns
15 |     -------
16 |         weight: torch.Tensor[feature_dim, dim1, dim2, ...]
17 |             weight of ridge linear regression. weight.size()[0] = feature_dim+1 if add_intercept is true
18 |     """
19 |     assert feature.dim() == 2
20 |     assert target.dim() >= 2
21 |     nData, nDim = feature.size()
22 |     A = torch.matmul(feature.t(), feature)
23 |     device = feature.device
24 |     A = A + reg * torch.eye(nDim, device=device)
25 |     # U = torch.cholesky(A)
26 |     # A_inv = torch.cholesky_inverse(U)
27 |     #TODO use cholesky version in the latest pytorch
28 |     A_inv = torch.inverse(A)
29 |     if target.dim() == 2:
30 |         b = torch.matmul(feature.t(), target)
31 |         weight = torch.matmul(A_inv, b)
32 |     else:
33 |         b = torch.einsum("nd,n...->d...", feature, target)
34 |         weight = torch.einsum("de,d...->e...", A_inv, b)
35 |     return weight
36 | 
37 | 
38 | def linear_reg_pred(feature: torch.Tensor, weight: torch.Tensor):
39 |     assert weight.dim() >= 2
40 |     if weight.dim() == 2:
41 |         return torch.matmul(feature, weight)
42 |     else:
43 |         return torch.einsum("nd,d...->n...", feature, weight)
44 | 
45 | 
46 | def linear_reg_loss(target: torch.Tensor,
47 |                     feature: torch.Tensor,
48 |                     reg: float):
49 |     weight = fit_linear(target, feature, reg)
50 |     pred = linear_reg_pred(feature, weight)
51 |     return torch.norm((target - pred)) ** 2 + reg * torch.norm(weight) ** 2
52 | 
53 | 
54 | def outer_prod(mat1: torch.Tensor, mat2: torch.Tensor):
55 |     """
56 |     Parameters
57 |     ----------
58 |     mat1: torch.Tensor[nBatch, mat1_dim1, mat1_dim2, mat1_dim3, ...]
59 |     mat2: torch.Tensor[nBatch, mat2_dim1, mat2_dim2, mat2_dim3, ...]
60 | 
61 |     Returns
62 |     -------
63 |     res : torch.Tensor[nBatch, mat1_dim1, ..., mat2_dim1, ...]
64 |     """
65 | 
66 |     mat1_shape = tuple(mat1.size())
67 |     mat2_shape = tuple(mat2.size())
68 |     assert mat1_shape[0] == mat2_shape[0]
69 |     nData = mat1_shape[0]
70 |     aug_mat1_shape = mat1_shape + (1,) * (len(mat2_shape) - 1)
71 |     aug_mat1 = torch.reshape(mat1, aug_mat1_shape)
72 |     aug_mat2_shape = (nData,) + (1,) * (len(mat1_shape) - 1) + mat2_shape[1:]
73 |     aug_mat2 = torch.reshape(mat2, aug_mat2_shape)
74 |     return aug_mat1 * aug_mat2
75 | 
76 | 
77 | def add_const_col(mat: torch.Tensor):
78 |     """
79 | 
80 |     Parameters
81 |     ----------
82 |     mat : torch.Tensor[n_data, n_col]
83 | 
84 |     Returns
85 |     -------
86 |     res : torch.Tensor[n_data, n_col+1]
87 |         add one column only contains 1.
88 | 
89 |     """
90 |     assert mat.dim() == 2
91 |     n_data = mat.size()[0]
92 |     device = mat.device
93 |     return torch.cat([mat, torch.ones((n_data, 1), device=device)], dim=1)
94 | 


--------------------------------------------------------------------------------
/mliv/inference/onestage/__init__.py:
--------------------------------------------------------------------------------
1 | from .onesiv_v1 import OneSIV


--------------------------------------------------------------------------------
/mliv/inference/onestage/onesiv_v1.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | import numpy as np
  4 | from torch.utils.data import DataLoader
  5 | from mliv.utils import set_seed, cat
  6 | 
  7 | example = '''
  8 | from mliv.inference import OneSIV
  9 | 
 10 | model = OneSIV()
 11 | model.fit(data)
 12 | ITE = model.predict(data.train)
 13 | ATE,_ = model.ATE(data.train)
 14 | '''
 15 | 
 16 | class Networks(nn.Module):
 17 |     def __init__(self, z_dim, x_dim, t_dim, dropout):
 18 |         super(Networks, self).__init__()
 19 | 
 20 |         t_input_dim, y_input_dim = z_dim+x_dim, t_dim+x_dim
 21 | 
 22 |         self.t_net = nn.Sequential(nn.Linear(t_input_dim, 1280),
 23 |                                    nn.ReLU(),
 24 |                                    nn.Dropout(dropout),
 25 |                                    nn.Linear(1280, 320),
 26 |                                    nn.ReLU(),
 27 |                                    nn.Dropout(dropout),
 28 |                                    nn.Linear(320, 32),
 29 |                                    nn.ReLU(),
 30 |                                    nn.Dropout(dropout),
 31 |                                    nn.Linear(32, t_dim))
 32 | 
 33 |         self.y_net = nn.Sequential(nn.Linear(y_input_dim, 1280),
 34 |                                 nn.ReLU(),
 35 |                                 nn.Dropout(dropout),
 36 |                                 nn.Linear(1280, 320),
 37 |                                 nn.ReLU(),
 38 |                                 nn.Dropout(dropout),
 39 |                                 nn.Linear(320, 32),
 40 |                                 nn.ReLU(),
 41 |                                 nn.Dropout(dropout),
 42 |                                 nn.Linear(32, 1))
 43 |         
 44 |     def forward(self, z, x):   
 45 |         pred_t = self.t_net(cat([z,x]))
 46 |         yt_input = torch.cat((pred_t,x), 1)
 47 |         pred_yt = self.y_net(yt_input)
 48 |         
 49 |         return pred_t, pred_yt
 50 | 
 51 | class OneSIV(object):
 52 |     def __init__(self) -> None:
 53 |         self.config = {
 54 |                     'methodName': 'OneSIV',
 55 |                     'device': 'cuda:0',
 56 |                     'learning_rate': 0.005,
 57 |                     'dropout': 0.5,
 58 |                     'beta1': 0.9,
 59 |                     'beta2': 0.999,
 60 |                     'eps': 1e-8,
 61 |                     'w1': 0.0017,
 62 |                     'w2': 1.0,
 63 |                     'epochs': 30,
 64 |                     'verbose': 1,
 65 |                     'show_per_epoch': 10,
 66 |                     'batch_size':1000,
 67 |                     'seed': 2022,   
 68 |                     }
 69 | 
 70 |     def set_Configuration(self, config):
 71 |         self.config = config
 72 | 
 73 |     def get_loader(self, data=None):
 74 |         if data is None:
 75 |             data = self.train
 76 |         loader = DataLoader(data, batch_size=self.batch_size)
 77 |         return loader
 78 | 
 79 |     def fit(self, data, exp=-1, config=None):
 80 |         if config is None:
 81 |             config = self.config
 82 | 
 83 |         self.z_dim = data.train.z.shape[1]
 84 |         self.x_dim = data.train.x.shape[1]
 85 |         self.t_dim = data.train.t.shape[1]
 86 |         
 87 |         self.device = config['device']
 88 |         self.batch_size = config['batch_size']
 89 | 
 90 |         set_seed(config['seed'])
 91 |         data.tensor()
 92 |         data.to(self.device)
 93 |         self.data = data
 94 | 
 95 |         OneSIV_dict = {
 96 |             'z_dim':self.z_dim, 
 97 |             'x_dim':self.x_dim, 
 98 |             't_dim':self.t_dim, 
 99 |             'dropout':config['dropout'],
100 |         }
101 | 
102 |         net = Networks(**OneSIV_dict)
103 |         net.to(self.device)
104 | 
105 |         optimizer = torch.optim.Adam(net.parameters(), lr=config['learning_rate'], betas=(config['beta1'], config['beta2']),eps=config['eps'])
106 |         t_loss = torch.nn.MSELoss()
107 |         y_loss = torch.nn.MSELoss()
108 | 
109 |         print('Run {}-th experiment for {}. '.format(exp, config['methodName']))
110 | 
111 |         train_loader = self.get_loader(data.train)
112 | 
113 |         def estimation(data):
114 |             net.eval()
115 |             return net.y_net(cat([data.t-data.t, data.x])), net.y_net(cat([data.t, data.x]))
116 | 
117 |         for epoch in range(config['epochs']):
118 |             net.train()
119 | 
120 |             for idx, inputs in enumerate(train_loader):
121 |                 z = inputs['z'].to(self.device)
122 |                 x = inputs['x'].to(self.device)
123 |                 t = inputs['t'].to(self.device)
124 |                 y = inputs['y'].to(self.device)
125 | 
126 |                 pred_t, pred_y = net(z,x)
127 |                 loss = config['w1'] * y_loss(pred_y, y) + config['w2'] * t_loss(pred_t, t)
128 | 
129 |                 optimizer.zero_grad()
130 |                 loss.backward()
131 |                 optimizer.step()
132 | 
133 |             net.eval()
134 |             if (config['verbose'] >= 1 and epoch % config['show_per_epoch'] == 0 ) or epoch == config['epochs']-1:
135 |                 _, pred_test_y = estimation(data.test)
136 |                 print(f'Epoch {epoch}: {y_loss(pred_test_y, data.test.y)}. ')
137 | 
138 |         print('End. ' + '-'*20)
139 | 
140 |         self.estimation = estimation
141 |         self.y_net = net.y_net
142 |         self.t_net = net.t_net
143 | 
144 |     def predict(self, data=None, t=None, x=None):
145 |         if data is None:
146 |             data = self.data.test
147 | 
148 |         if x is None:
149 |             x = data.x
150 | 
151 |         if t is None:
152 |             t = data.t
153 | 
154 |         return self.y_net(cat([t,x])).detach().cpu().numpy()
155 | 
156 |     def ITE(self, data=None, t=None, x=None):
157 |         if data is None:
158 |             data = self.data.test
159 | 
160 |         if x is None:
161 |             x = data.x
162 | 
163 |         if t is None:
164 |             t = data.t
165 | 
166 |         ITE_0 = self.y_net(cat([t-t,x])).detach().cpu().numpy()
167 |         ITE_1 = self.y_net(cat([t-t+1,x])).detach().cpu().numpy()
168 |         ITE_t = self.y_net(cat([t,x])).detach().cpu().numpy()
169 | 
170 |         return ITE_0,ITE_1,ITE_t
171 | 
172 |     def ATE(self, data=None, t=None, x=None):
173 |         ITE_0,ITE_1,ITE_t = self.ITE(data,t,x)
174 | 
175 |         return np.mean(ITE_1-ITE_0), np.mean(ITE_t-ITE_0)
176 | 
177 | 


--------------------------------------------------------------------------------
/mliv/inference/sieve/__init__.py:
--------------------------------------------------------------------------------
1 | from .kerneliv_v1 import KernelIV
2 | from .dualiv_v1 import DualIV


--------------------------------------------------------------------------------
/mliv/inference/sieve/dualiv_v1.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import csv
  3 | import cvxopt
  4 | from mliv.utils import set_seed, cat
  5 | 
  6 | example = '''
  7 | from mliv.inference import DualIV
  8 | 
  9 | model = DualIV()
 10 | model.fit(data)
 11 | ITE = model.predict(data.train)
 12 | ATE,_ = model.ATE(data.train)
 13 | '''
 14 | 
 15 | def quadprog(H, f, L=None, k=None, Aeq=None, beq=None, lb=None, ub=None):
 16 |     """
 17 |     Input: Numpy arrays, the format follows MATLAB quadprog function: https://www.mathworks.com/help/optim/ug/quadprog.html
 18 |     Output: Numpy array of the solution
 19 |     """
 20 |     n_var = H.shape[1]
 21 | 
 22 |     P = cvxopt.matrix(H, tc='d')
 23 |     q = cvxopt.matrix(f, tc='d')
 24 | 
 25 |     if L is not None or k is not None:
 26 |         assert(k is not None and L is not None)
 27 |         if lb is not None:
 28 |             L = np.vstack([L, -np.eye(n_var)])
 29 |             k = np.vstack([k, -lb])
 30 | 
 31 |         if ub is not None:
 32 |             L = np.vstack([L, np.eye(n_var)])
 33 |             k = np.vstack([k, ub])
 34 | 
 35 |         L = cvxopt.matrix(L, tc='d')
 36 |         k = cvxopt.matrix(k, tc='d')
 37 | 
 38 |     if Aeq is not None or beq is not None:
 39 |         assert(Aeq is not None and beq is not None)
 40 |         Aeq = cvxopt.matrix(Aeq, tc='d')
 41 |         beq = cvxopt.matrix(beq, tc='d')
 42 | 
 43 |     sol = cvxopt.solvers.qp(P, q, L, k, Aeq, beq)
 44 | 
 45 |     return np.array(sol['x'])
 46 | def median_inter(z):
 47 |     n = len(z)
 48 |     z = z.reshape(n, -1)
 49 |     A = np.repeat(z, repeats=n, axis=1)
 50 |     B = A.T
 51 |     dist = np.abs(A-B).reshape(-1,1)
 52 |     vz=np.median(dist)
 53 |     return vz
 54 | 
 55 | def get_K_entry(x,z,v):
 56 |     return np.exp((np.linalg.norm(x-z)**2) / (-2 * (v **2)))
 57 | 
 58 | def get_K_matrix(X1,X2,v):
 59 |     M = len(X1)
 60 |     N = len(X2)
 61 |     K_true = np.zeros((M,N))
 62 | 
 63 |     for i in range(M):
 64 |         for j in range(N):
 65 |             K_true[i,j] = get_K_entry(X1[i:i+1,:].T, X2[j:j+1,:].T, v)
 66 |     return K_true
 67 | 
 68 | def get_K_entry_2d(x,z,Vmat):
 69 |     return np.exp((x-z).T @ Vmat @ (x-z) /2)
 70 | 
 71 | def get_K_matrix_2d(X1,X2,Vmat):
 72 |     M = len(X1)
 73 |     N = len(X2)
 74 |     K_true = np.zeros((M,N))
 75 |     Vmat = np.linalg.inv(Vmat)
 76 | 
 77 |     for i in range(M):
 78 |         for j in range(N):
 79 |             K_true[i,j] = get_K_entry_2d(X1[i:i+1,:].T, X2[j:j+1,:].T, Vmat)
 80 |             
 81 |     return K_true
 82 | 
 83 | def DualIV_trainer(x, y, z):
 84 |     N, x_dim = x.shape
 85 | 
 86 |     vx = [median_inter(x[:,i]) for i in range(x_dim)]
 87 |     vz = median_inter(z)
 88 |     vy = median_inter(y)
 89 | 
 90 |     K_xx = 1
 91 |     for i in range(x_dim):
 92 |         K_xx = K_xx * get_K_matrix(x[:,i:i+1], x[:,i:i+1], vx[i])
 93 |     K_zz = get_K_matrix(z, z, vz)
 94 |     K_yy = get_K_matrix(y, y, vy)
 95 | 
 96 |     K = K_xx
 97 | 
 98 |     yz = np.concatenate([y,z],-1)
 99 |     vyz = 90000
100 |     Vmat = np.array([[vy, vyz], [vyz, vz]])
101 |     L_yzyz = get_K_matrix_2d(yz, yz, Vmat)
102 | 
103 |     L = L_yzyz
104 | 
105 |     lambda1 = 0.001
106 |     gamma = N * np.linalg.norm(L @ L, 2) / np.linalg.norm(K @ L, 2) ** 2
107 |     A = L @ L + 1 / N * gamma * L @ (K @ K) @ L + lambda1 * np.eye(N)
108 |     Ainv = np.linalg.inv(A)
109 | 
110 |     lambda2 = 0.001
111 |     Q = 2 * K.T @ L.T @ Ainv @ L @ K + lambda2 * np.eye(N)
112 |     R = - 2 * K.T @ L.T @ Ainv @ L @ y
113 | 
114 |     beta = quadprog(Q,R)
115 |     
116 |     return beta, vx
117 | 
118 | class DualIV(object):
119 |     def __init__(self) -> None:
120 |         self.config = {
121 |                     'methodName': 'DualIV',
122 |                     'num': -1,
123 |                     'seed': 2022,   
124 |                     }
125 | 
126 |     def set_Configuration(self, config):
127 |         self.config = config
128 | 
129 |     def fit(self, data, exp=-1, config=None):
130 |         if config is None:
131 |             config = self.config
132 | 
133 |         set_seed(config['seed'])
134 |         data.numpy()
135 | 
136 |         num = config['num'] 
137 |         num = num if num > 0 else data.train.length
138 | 
139 |         x4train = cat([data.train.t[:num], data.train.x[:num]])
140 |         y4train = data.train.y[:num]
141 |         z4train = data.train.z[:num]
142 | 
143 |         print('Run {}-th experiment for {}. '.format(exp, config['methodName']))
144 | 
145 |         beta, vx = DualIV_trainer(x4train, y4train, z4train)
146 | 
147 |         def estimation(data):
148 |             return backResult(x4train, cat([data.t-data.t, data.x]), beta, vx), backResult(x4train, cat([data.t, data.x]), beta, vx)
149 | 
150 |         print('End. ' + '-'*20)
151 | 
152 |         self.x4train = x4train
153 |         self.beta = beta
154 |         self.vx = vx
155 |         self.estimation = estimation
156 | 
157 |     def predict(self, data=None, t=None, x=None):
158 |         if data is None:
159 |             data = self.data.test
160 | 
161 |         if x is None:
162 |             x = data.x
163 | 
164 |         if t is None:
165 |             t = data.t
166 | 
167 |         return backResult(self.x4train, cat([t, x]),self.beta ,self.vx)
168 | 
169 |     def ITE(self, data=None, t=None, x=None):
170 |         if data is None:
171 |             data = self.data.test
172 | 
173 |         if x is None:
174 |             x = data.x
175 | 
176 |         if t is None:
177 |             t = data.t
178 | 
179 |         ITE_0 = backResult(self.x4train, cat([t-t, x]),self.beta ,self.vx)
180 |         ITE_1 = backResult(self.x4train, cat([t-t+1, x]),self.beta ,self.vx)
181 |         ITE_t = backResult(self.x4train, cat([t, x]),self.beta ,self.vx)
182 | 
183 |         return ITE_0,ITE_1,ITE_t
184 | 
185 |     def ATE(self, data=None, t=None, x=None):
186 |         ITE_0,ITE_1,ITE_t = self.ITE(data,t,x)
187 | 
188 |         return np.mean(ITE_1-ITE_0), np.mean(ITE_t-ITE_0)
189 | 
190 | def backResult(x, x_vis, beta, vx):
191 |     x_dim = x.shape[1]
192 |     K_Xtest = 1
193 |     for i in range(x_dim):
194 |         K_Xtest = K_Xtest * get_K_matrix(x[:,i:i+1], x_vis[:,i:i+1], vx[i])
195 |         
196 |     y_vis_dual = K_Xtest.T @ beta
197 |     return y_vis_dual
198 | 
199 | 


--------------------------------------------------------------------------------
/mliv/inference/sieve/kerneliv_v1.py:
--------------------------------------------------------------------------------
  1 | from mliv.utils import set_seed, cat
  2 | from typing import NamedTuple, Dict, Any, Optional, List
  3 | import numpy as np
  4 | import torch
  5 | from scipy.spatial.distance import cdist
  6 | from sklearn.model_selection import train_test_split
  7 | 
  8 | example = '''
  9 | from mliv.inference import KernelIV
 10 | 
 11 | model = KernelIV()
 12 | model.fit(data)
 13 | ITE = model.predict(data.train)
 14 | ATE,_ = model.ATE(data.train)
 15 | '''
 16 | 
 17 | ##############################################   data_class.py   #######################
 18 | class TrainDataSet(NamedTuple):
 19 |     treatment: np.ndarray
 20 |     instrumental: np.ndarray
 21 |     covariate: Optional[np.ndarray]
 22 |     outcome: np.ndarray
 23 |     structural: np.ndarray
 24 | 
 25 | class TestDataSet(NamedTuple):
 26 |     treatment: np.ndarray
 27 |     covariate: Optional[np.ndarray]
 28 |     structural: np.ndarray
 29 |     instrumental: Optional[np.ndarray]
 30 |     outcome: Optional[np.ndarray]
 31 | 
 32 | class TrainDataSetTorch(NamedTuple):
 33 |     treatment: torch.Tensor
 34 |     instrumental: torch.Tensor
 35 |     covariate: torch.Tensor
 36 |     outcome: torch.Tensor
 37 |     structural: torch.Tensor
 38 | 
 39 |     @classmethod
 40 |     def from_numpy(cls, train_data: TrainDataSet):
 41 |         covariate = None
 42 |         if train_data.covariate is not None:
 43 |             covariate = torch.tensor(train_data.covariate, dtype=torch.float32)
 44 |         return TrainDataSetTorch(treatment=torch.tensor(train_data.treatment, dtype=torch.float32),
 45 |                                  instrumental=torch.tensor(train_data.instrumental, dtype=torch.float32),
 46 |                                  covariate=covariate,
 47 |                                  outcome=torch.tensor(train_data.outcome, dtype=torch.float32),
 48 |                                  structural=torch.tensor(train_data.structural, dtype=torch.float32))
 49 | 
 50 |     def to_gpu(self):
 51 |         covariate = None
 52 |         if self.covariate is not None:
 53 |             covariate = self.covariate.cuda()
 54 |         return TrainDataSetTorch(treatment=self.treatment.cuda(),
 55 |                                  instrumental=self.instrumental.cuda(),
 56 |                                  covariate=covariate,
 57 |                                  outcome=self.outcome.cuda(),
 58 |                                  structural=self.structural.cuda())
 59 | 
 60 | 
 61 | class TestDataSetTorch(NamedTuple):
 62 |     treatment: torch.Tensor
 63 |     instrumental: torch.Tensor
 64 |     covariate: torch.Tensor
 65 |     outcome: torch.Tensor
 66 |     structural: torch.Tensor
 67 | 
 68 |     @classmethod
 69 |     def from_numpy(cls, test_data: TestDataSet):
 70 |         covariate = None
 71 |         instrumental = None
 72 |         outcome = None
 73 |         if test_data.covariate is not None:
 74 |             covariate = torch.tensor(test_data.covariate, dtype=torch.float32)
 75 |         if test_data.instrumental is not None:
 76 |             instrumental = torch.tensor(test_data.instrumental, dtype=torch.float32)
 77 |         if test_data.outcome is not None:
 78 |             outcome = torch.tensor(test_data.outcome, dtype=torch.float32)
 79 |         return TestDataSetTorch(treatment=torch.tensor(test_data.treatment, dtype=torch.float32),
 80 |                                 covariate=covariate,
 81 |                                 instrumental=instrumental,
 82 |                                 outcome=outcome,
 83 |                                 structural=torch.tensor(test_data.structural, dtype=torch.float32))
 84 |     def to_gpu(self):
 85 |         covariate = None
 86 |         instrumental = None
 87 |         outcome = None
 88 |         if self.covariate is not None:
 89 |             covariate = self.covariate.cuda()
 90 |         if self.instrumental is not None:
 91 |             instrumental = self.instrumental.cuda()
 92 |         if self.outcome is not None:
 93 |             outcome = self.outcome.cuda()
 94 |         return TestDataSetTorch(treatment=self.treatment.cuda(),
 95 |                                 covariate=covariate,
 96 |                                 instrumental=instrumental,
 97 |                                 outcome=outcome,
 98 |                                 structural=self.structural.cuda())
 99 | 
100 | #################################  model.py    ############################
101 | class KernelIVModel:
102 | 
103 |     def __init__(self, X_train: np.ndarray, alpha: np.ndarray, sigma: float):
104 |         """
105 | 
106 |         Parameters
107 |         ----------
108 |         X_train: np.ndarray[n_stage1, dim_treatment]
109 |             data for treatment
110 |         alpha:  np.ndarray[n_stage1*n_stage2 ,dim_outcome]
111 |             final weight for prediction
112 |         sigma: gauss parameter
113 |         """
114 |         self.X_train = X_train
115 |         self.alpha = alpha
116 |         self.sigma = sigma
117 | 
118 |     @staticmethod
119 |     def cal_gauss(XA, XB, sigma: float = 1):
120 |         """
121 |         Returns gaussian kernel matrix
122 |         Parameters
123 |         ----------
124 |         XA : np.ndarray[n_data1, n_dim]
125 |         XB : np.ndarray[n_data2, n_dim]
126 |         sigma : float
127 | 
128 |         Returns
129 |         -------
130 |         mat: np.ndarray[n_data1, n_data2]
131 |         """
132 |         dist_mat = cdist(XA, XB, "sqeuclidean")
133 |         return np.exp(-dist_mat / sigma)
134 | 
135 |     def predict(self, treatment: np.ndarray, covariate: np.ndarray):
136 |         X = np.array(treatment, copy=True)
137 |         if covariate is not None:
138 |             X = np.concatenate([X, covariate], axis=1)
139 |         Kx = self.cal_gauss(X, self.X_train, self.sigma)
140 |         return np.dot(Kx, self.alpha)
141 | 
142 |     def evaluate(self, test_data: TestDataSet):
143 |         pred = self.predict(test_data.treatment, test_data.covariate)
144 |         return np.mean((test_data.structural - pred)**2)
145 | 
146 | ############## trainer.py ##############
147 | def get_median(X) -> float:
148 |     dist_mat = cdist(X, X, "sqeuclidean")
149 |     res: float = np.median(dist_mat)
150 |     return res
151 | 
152 | 
153 | class KernelIVTrainer:
154 | 
155 |     def __init__(self, data_list: List, train_params: Dict[str, Any]):
156 |         self.data_list = data_list
157 | 
158 |         self.lambda1 = train_params["lam1"]
159 |         self.lambda2 = train_params["lam2"]
160 |         self.split_ratio = train_params["split_ratio"]
161 | 
162 |     def split_train_data(self, train_data: TrainDataSet):
163 |         n_data = train_data[0].shape[0]
164 |         idx_train_1st, idx_train_2nd = train_test_split(np.arange(n_data), train_size=self.split_ratio)
165 | 
166 |         def get_data(data, idx):
167 |             return data[idx] if data is not None else None
168 | 
169 |         train_1st_data = TrainDataSet(*[get_data(data, idx_train_1st) for data in train_data])
170 |         train_2nd_data = TrainDataSet(*[get_data(data, idx_train_2nd) for data in train_data])
171 |         return train_1st_data, train_2nd_data
172 | 
173 |     def train(self, rand_seed: int = 42, verbose: int = 0) -> float:
174 |         """
175 | 
176 |         Parameters
177 |         ----------
178 |         rand_seed: int
179 |             random seed
180 |         verbose : int
181 |             Determine the level of logging
182 |         Returns
183 |         -------
184 |         oos_result : float
185 |             The performance of model evaluated by oos
186 |         """
187 |         train_data = self.data_list[0]
188 |         test_data = self.data_list[2]
189 |         train_1st_data, train_2nd_data = self.split_train_data(train_data)
190 | 
191 |         # get stage1 data
192 |         X1 = train_1st_data.treatment
193 |         if train_1st_data.covariate is not None:
194 |             X1 = np.concatenate([X1, train_1st_data.covariate], axis=-1)
195 |         Z1 = train_1st_data.instrumental
196 |         Y1 = train_1st_data.outcome
197 |         N = X1.shape[0]
198 | 
199 |         # get stage2 data
200 |         X2 = train_2nd_data.treatment
201 |         if train_2nd_data.covariate is not None:
202 |             X2 = np.concatenate([X2, train_2nd_data.covariate], axis=-1)
203 |         Z2 = train_2nd_data.instrumental
204 |         Y2 = train_2nd_data.outcome
205 |         M = X2.shape[0]
206 | 
207 |         if verbose > 0:
208 |             print("start stage1")
209 | 
210 |         sigmaX = get_median(X1)
211 |         sigmaZ = get_median(Z1)
212 |         KX1X1 = KernelIVModel.cal_gauss(X1, X1, sigmaX)
213 |         KZ1Z1 = KernelIVModel.cal_gauss(Z1, Z1, sigmaZ)
214 |         KZ1Z2 = KernelIVModel.cal_gauss(Z1, Z2, sigmaZ)
215 |         KX1X2 = KernelIVModel.cal_gauss(X1, X2, sigmaX)
216 | 
217 |         if isinstance(self.lambda1, list):
218 |             self.lambda1 = 10 ** np.linspace(self.lambda1[0], self.lambda1[1], 50)
219 |             gamma = self.stage1_tuning(KX1X1, KX1X2, KZ1Z1, KZ1Z2)
220 |         else:
221 |             gamma = np.linalg.solve(KZ1Z1 + N * self.lambda1 * np.eye(N), KZ1Z2)
222 |         W = KX1X1.dot(gamma)
223 |         if verbose > 0:
224 |             print("end stage1")
225 |             print("start stage2")
226 | 
227 |         if isinstance(self.lambda2, list):
228 |             self.lambda2 = 10 ** np.linspace(self.lambda2[0], self.lambda2[1], 50)
229 |             alpha = self.stage2_tuning(W, KX1X1, Y1, Y2)
230 |         else:
231 |             alpha = np.linalg.solve(W.dot(W.T) + M * self.lambda2 * KX1X1, W.dot(Y2))
232 | 
233 |         if verbose > 0:
234 |             print("end stage2")
235 | 
236 |         mdl = KernelIVModel(X1, alpha, sigmaX)
237 |         train_loss = mdl.evaluate(train_data)
238 | 
239 |         test_loss = mdl.evaluate(test_data)
240 |         if verbose > 0:
241 |             print(f"test_loss:{test_loss}")
242 | 
243 |         return train_loss, test_loss, mdl
244 | 
245 |     def stage1_tuning(self, KX1X1, KX1X2, KZ1Z1, KZ1Z2):
246 |         N = KX1X1.shape[0]
247 |         gamma_list = [np.linalg.solve(KZ1Z1 + N * lam1 * np.eye(N), KZ1Z2) for lam1 in self.lambda1]
248 |         score = [np.trace(gamma.T.dot(KX1X1.dot(gamma)) - 2 * KX1X2.T.dot(gamma)) for gamma in gamma_list]
249 |         self.lambda1 = self.lambda1[np.argmin(score)]
250 |         return gamma_list[np.argmin(score)]
251 | 
252 |     def stage2_tuning(self, W, KX1X1, Y1, Y2):
253 |         M = W.shape[1]
254 |         b = W.dot(Y2)
255 |         A = W.dot(W.T)
256 |         alpha_list = [np.linalg.solve(A + M * lam2 * KX1X1, b) for lam2 in self.lambda2]
257 |         score = [np.linalg.norm(Y1 - KX1X1.dot(alpha)) for alpha in alpha_list]
258 |         self.lambda2 = self.lambda2[np.argmin(score)]
259 |         return alpha_list[np.argmin(score)]
260 | 
261 | class KernelIV(object):
262 |     def __init__(self) -> None:
263 |         self.config = {
264 |                     'methodName': 'KernelIV',
265 |                     'num': -1,
266 |                     'lam1': [-2, -10],
267 |                     'lam2': [-2, -10],
268 |                     'split_ratio': 0.5,
269 |                     'verbose': 1,
270 |                     'seed': 2022,   
271 |                     }
272 | 
273 |     def set_Configuration(self, config):
274 |         self.config = config
275 | 
276 |     def fit(self, data, exp=-1, config=None):
277 |         if config is None:
278 |             config = self.config
279 | 
280 |         set_seed(config['seed'])
281 |         data.numpy()
282 | 
283 |         num = config['num'] 
284 |         num = num if num > 0 else data.train.length
285 | 
286 |         train_config = {'lam1': config['lam1'],
287 |                         'lam2': config['lam2'],
288 |                         'split_ratio': config['split_ratio']}
289 |         verbose = config['verbose']
290 | 
291 |         train_data = TrainDataSet(treatment=data.train.t[:num],
292 |                                     instrumental=cat([data.train.z, data.train.x])[:num],
293 |                                     covariate=data.train.x[:num],
294 |                                     outcome=data.train.y[:num],
295 |                                     structural=data.train.g[:num])
296 |         val_data = TrainDataSet(treatment=data.valid.t,
297 |                                     instrumental=cat([data.valid.z, data.valid.x]),
298 |                                     covariate=data.valid.x,
299 |                                     outcome=data.valid.y,
300 |                                     structural=data.valid.g)
301 |         test_data = TestDataSet(treatment=data.test.t,
302 |                                     instrumental=cat([data.test.z, data.test.x]),
303 |                                     covariate=data.test.x,
304 |                                     outcome=data.test.y,
305 |                                     structural=data.test.g)
306 |         data_list = [train_data, val_data, test_data]
307 | 
308 |         print('Run {}-th experiment for {}. '.format(exp, config['methodName']))
309 | 
310 |         trainer = KernelIVTrainer(data_list, train_config)
311 |         train_loss, test_loss, mdl = trainer.train(rand_seed=42, verbose=verbose)
312 | 
313 |         print('End. ' + '-'*20)
314 | 
315 |         def estimation(data):
316 |             return mdl.predict(data.t-data.t, data.x), mdl.predict(data.t, data.x)
317 | 
318 |         self.mdl = mdl
319 |         self.estimation = estimation
320 | 
321 |     def predict(self, data=None, t=None, x=None):
322 |         if data is None:
323 |             data = self.data.test
324 | 
325 |         if x is None:
326 |             x = data.x
327 | 
328 |         if t is None:
329 |             t = data.t
330 | 
331 |         return self.mdl.predict(t,x)
332 | 
333 |     def ITE(self, data=None, t=None, x=None):
334 |         if data is None:
335 |             data = self.data.test
336 | 
337 |         if x is None:
338 |             x = data.x
339 | 
340 |         if t is None:
341 |             t = data.t
342 | 
343 |         ITE_0 = self.mdl.predict(t-t,x)
344 |         ITE_1 = self.mdl.predict(t-t+1,x)
345 |         ITE_t = self.mdl.predict(t,x)
346 | 
347 |         return ITE_0,ITE_1,ITE_t
348 | 
349 |     def ATE(self, data=None, t=None, x=None):
350 |         ITE_0,ITE_1,ITE_t = self.ITE(data,t,x)
351 | 
352 |         return np.mean(ITE_1-ITE_0), np.mean(ITE_t-ITE_0)


--------------------------------------------------------------------------------
/mliv/inference/twosls/__init__.py:
--------------------------------------------------------------------------------
1 | from .vanilla2sls_v1 import Vanilla2SLS
2 | from .poly2sls_v1 import Poly2SLS
3 | from .nn2sls_v1 import NN2SLS


--------------------------------------------------------------------------------
/mliv/inference/twosls/nn2sls_v1.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from mliv.utils import set_seed, cat
  4 | from torch.utils.data import DataLoader
  5 | import numpy as np
  6 | 
  7 | example = '''
  8 | from mliv.inference import NN2SLS
  9 | 
 10 | model = NN2SLS()
 11 | model.fit(data)
 12 | ITE = model.predict(data.train)
 13 | ATE,_ = model.ATE(data.train)
 14 | '''
 15 | 
 16 | class NN2SLS(object):
 17 |     def __init__(self) -> None:
 18 |         self.config = {
 19 |                     'methodName': 'NN2SLS',
 20 |                     'device': 'cuda:0',
 21 |                     'instrumental_weight_decay': 0.0,
 22 |                     'covariate_weight_decay': 0.0,
 23 |                     'learning_rate': 0.005,
 24 |                     'verbose':1,
 25 |                     'show_per_epoch':5,
 26 |                     'lam2':0.1,
 27 |                     'epochs':100,
 28 |                     'batch_size':1000,
 29 |                     'seed': 2022
 30 |                     }
 31 | 
 32 |     def set_Configuration(self, config):
 33 |         self.config = config
 34 | 
 35 |     def fit(self, data, exp=-1, config=None):
 36 |         if config is None:
 37 |             config = self.config
 38 | 
 39 |         self.z_dim = data.train.z.shape[1]
 40 |         self.x_dim = data.train.x.shape[1]
 41 |         self.t_dim = data.train.t.shape[1]
 42 | 
 43 |         self.device = config['device']
 44 |         self.instrumental_weight_decay = config['instrumental_weight_decay']
 45 |         self.covariate_weight_decay = config['covariate_weight_decay']
 46 |         self.learning_rate = config['learning_rate']
 47 | 
 48 |         self.verbose = config['verbose']
 49 |         self.show_per_epoch = config['show_per_epoch']
 50 |         self.lam2 = config['lam2']
 51 |         self.epochs = config['epochs']
 52 |         self.batch_size = config['batch_size']
 53 | 
 54 |         self.build_net()
 55 | 
 56 |         set_seed(config['seed'])
 57 |         data.tensor()
 58 |         data.to(self.device)
 59 |         self.data = data
 60 | 
 61 |         print('Run {}-th experiment for {}. '.format(exp, config['methodName']))
 62 | 
 63 |         self.train()
 64 | 
 65 |         print('End. ' + '-'*20)
 66 | 
 67 |     def build_net(self):
 68 |         self.instrumental_net = nn.Sequential(nn.Linear(self.z_dim+self.x_dim, 1280),
 69 |                                       nn.ReLU(),
 70 |                                       nn.Linear(1280, 320),
 71 |                                       nn.BatchNorm1d(320),
 72 |                                       nn.ReLU(),
 73 |                                       nn.Linear(320, 32),
 74 |                                       nn.ReLU(),
 75 |                                       nn.Linear(32, 1))
 76 | 
 77 |         self.covariate_net = nn.Sequential(nn.Linear(self.x_dim+self.t_dim, 1280),
 78 |                                       nn.ReLU(),
 79 |                                       nn.Linear(1280, 320),
 80 |                                       nn.BatchNorm1d(320),
 81 |                                       nn.ReLU(),
 82 |                                       nn.Linear(320, 32),
 83 |                                       nn.ReLU(),
 84 |                                       nn.Linear(32, 1))
 85 | 
 86 |         self.instrumental_net.to(self.device)
 87 |         self.covariate_net.to(self.device)
 88 | 
 89 |         self.instrumental_opt = torch.optim.Adam(self.instrumental_net.parameters(),lr=self.learning_rate,weight_decay=self.instrumental_weight_decay)
 90 |         self.covariate_opt = torch.optim.Adam(self.covariate_net.parameters(),lr=self.learning_rate,weight_decay=self.covariate_weight_decay)
 91 | 
 92 |         self.loss_fn4t = torch.nn.MSELoss()
 93 |         self.loss_fn4y = torch.nn.MSELoss()
 94 | 
 95 |     def train(self, verbose=None, show_per_epoch=None):
 96 |         if verbose is None or show_per_epoch is None:
 97 |             verbose, show_per_epoch = self.verbose, self.show_per_epoch
 98 | 
 99 |         self.lam2 *= self.data.train.length
100 | 
101 |         for exp in range(self.epochs):
102 |             self.instrumental_update(self.data.train, verbose)
103 | 
104 |             if verbose >= 1 and (exp % show_per_epoch == 0 or exp == self.epochs - 1):
105 |                 print(type(self.data.train.z))
106 |                 train_t_hat = self.instrumental_net(cat([self.data.train.x,self.data.train.z])).detach()
107 |                 valid_t_hat = self.instrumental_net(cat([self.data.valid.x,self.data.valid.z])).detach()
108 |                 
109 |                 loss_train = self.loss_fn4t(train_t_hat, self.data.train.t)
110 |                 loss_valid = self.loss_fn4t(valid_t_hat, self.data.valid.t)
111 | 
112 |                 print("Epoch {} ended: train - {:.4f}, valid - {:.4f}.".format(exp, loss_train, loss_valid))
113 | 
114 | 
115 |         for exp in range(self.epochs):
116 |             self.covariate_update(self.data.train, verbose)
117 | 
118 |             if verbose >= 1 and (exp % show_per_epoch == 0 or exp == self.epochs - 1):
119 |                 eval_train = self.evaluate(self.data.train)
120 |                 eval_valid = self.evaluate(self.data.valid)
121 |                 eval_test  = self.evaluate(self.data.test)
122 | 
123 |                 print(f"Epoch {exp} ended:")
124 |                 print(f"Train: {eval_train}. ")
125 |                 print(f"Valid: {eval_valid}. ")
126 |                 print(f"Test : {eval_test}. ")
127 | 
128 |     def get_loader(self, data=None):
129 |         if data is None:
130 |             data = self.train
131 |         loader = DataLoader(data, batch_size=self.batch_size)
132 |         return loader
133 | 
134 |     def instrumental_update(self, data, verbose):
135 |         loader = self.get_loader(data)
136 |         self.instrumental_net.train(True)
137 | 
138 |         for idx, inputs in enumerate(loader):
139 |             x = inputs['x'].to(self.device)
140 |             t = inputs['t'].to(self.device)
141 |             z = inputs['z'].to(self.device)
142 | 
143 |             t_hat = self.instrumental_net(cat([x,z]))
144 | 
145 |             loss = self.loss_fn4t(t_hat, t)
146 | 
147 |             self.instrumental_opt.zero_grad()
148 |             loss.backward()
149 |             self.instrumental_opt.step()
150 | 
151 |             if verbose >= 2:
152 |                 print('Batch {} - loss: {:.4f}'.format(idx, loss))
153 |         
154 |         self.instrumental_net.train(False)
155 | 
156 |     def covariate_update(self, data, verbose):
157 |         loader = self.get_loader(data)
158 |         self.covariate_net.train(True)
159 | 
160 |         for idx, inputs in enumerate(loader):
161 |             x = inputs['x'].to(self.device)
162 |             z = inputs['z'].to(self.device)
163 |             y = inputs['y'].to(self.device)
164 | 
165 |             t_hat = self.instrumental_net(cat([x,z]))
166 |             y_hat = self.covariate_net(cat([x,t_hat]))
167 | 
168 |             loss = self.loss_fn4y(y_hat, y)
169 | 
170 |             self.covariate_opt.zero_grad()
171 |             loss.backward()
172 |             self.covariate_opt.step()
173 | 
174 |             if verbose >= 2:
175 |                 print('Batch {} - loss: {:.4f}'.format(idx, loss))
176 | 
177 |         self.covariate_net.train(False)
178 | 
179 |     def predict(self, data=None, t=None, x=None):
180 |         if data is None:
181 |             data = self.data.test
182 | 
183 |         if x is None:
184 |             x = data.x
185 | 
186 |         if t is None:
187 |             t = data.t
188 | 
189 |         return self.covariate_net(cat([x,t])).detach().cpu().numpy()
190 | 
191 |     def ITE(self, data=None, t=None, x=None):
192 |         if data is None:
193 |             data = self.data.test
194 | 
195 |         if x is None:
196 |             x = data.x
197 | 
198 |         if t is None:
199 |             t = data.t
200 | 
201 |         ITE_0 = self.covariate_net(cat([x,t-t])).detach().cpu().numpy()
202 |         ITE_1 = self.covariate_net(cat([x,t-t+1])).detach().cpu().numpy()
203 |         ITE_t = self.covariate_net(cat([x,t])).detach().cpu().numpy()
204 | 
205 |         return ITE_0,ITE_1,ITE_t
206 | 
207 |     def ATE(self, data=None, t=None, x=None):
208 |         ITE_0,ITE_1,ITE_t = self.ITE(data,t,x)
209 | 
210 |         return np.mean(ITE_1-ITE_0), np.mean(ITE_t-ITE_0)
211 | 
212 |     def estimation(self, data):
213 |         self.covariate_net.train(False)
214 | 
215 |         y0_hat = self.covariate_net(cat([data.x,data.t-data.t]))
216 |         yt_hat = self.covariate_net(cat([data.x,data.t]))
217 | 
218 |         return y0_hat, yt_hat
219 | 
220 |     def evaluate(self, data):
221 |         y0_hat, yt_hat = self.estimation(data)
222 | 
223 |         loss_y = self.loss_fn4y(yt_hat, data.y)
224 | 
225 |         eval_str = 'loss_y: {:.4f}'.format(loss_y)
226 |         return eval_str


--------------------------------------------------------------------------------
/mliv/inference/twosls/poly2sls_v1.py:
--------------------------------------------------------------------------------
 1 | from sklearn.model_selection import GridSearchCV
 2 | from sklearn.linear_model import Ridge
 3 | from sklearn.preprocessing import PolynomialFeatures
 4 | from sklearn.pipeline import Pipeline
 5 | from sklearn.linear_model import LinearRegression
 6 | import numpy as np
 7 | from mliv.utils import set_seed
 8 | 
 9 | example = '''
10 | from mliv.inference import Poly2SLS
11 | 
12 | model = Poly2SLS()
13 | model.fit(data)
14 | ITE = model.predict(data.train)
15 | ATE,_ = model.ATE(data.train)
16 | '''
17 | 
18 | class Poly2SLS(object):
19 |     def __init__(self) -> None:
20 |         self.config = {
21 |                     'methodName': 'Poly2SLS',
22 |                     'seed': 2022
23 |                     }
24 | 
25 |     def set_Configuration(self, config):
26 |         self.config = config
27 | 
28 |     def fit(self, data, exp=-1, config=None):
29 |         if config is None:
30 |             config = self.config
31 | 
32 |         set_seed(config['seed'])
33 |         data.numpy()
34 | 
35 |         print('Run {}-th experiment for {}. '.format(exp, config['methodName']))
36 | 
37 |         params = dict(poly__degree=range(1, 4), ridge__alpha=np.logspace(-5, 5, 11))
38 |         pipe = Pipeline([('poly', PolynomialFeatures()), ('ridge', Ridge())])
39 |         stage_1 = GridSearchCV(pipe, param_grid=params, cv=5)
40 |         stage_1.fit(np.concatenate([data.train.z, 1-data.train.z, data.train.x], axis=1), data.train.t)
41 |         t_hat = stage_1.predict(np.concatenate([data.train.z, 1-data.train.z, data.train.x], axis=1))
42 | 
43 |         pipe2 = Pipeline([('poly', PolynomialFeatures()), ('ridge', Ridge())])
44 |         stage_2 = GridSearchCV(pipe2, param_grid=params, cv=5)
45 |         stage_2.fit(np.concatenate([t_hat, data.train.x], axis=1), data.train.y)
46 | 
47 |         self.data = data
48 |         self.stage_1 = stage_1
49 |         self.stage_2 = stage_2
50 | 
51 |         print('End. ' + '-'*20)
52 | 
53 |         def estimation(data):
54 |             return stage_2.predict(np.concatenate([data.t-data.t, data.x], axis=1)), stage_2.predict(np.concatenate([data.t, data.x], axis=1))
55 | 
56 |         self.estimation = estimation
57 | 
58 |     def predict(self, data=None, t=None, x=None):
59 |         if data is None:
60 |             data = self.data.test
61 | 
62 |         if x is None:
63 |             x = data.x
64 | 
65 |         if t is None:
66 |             t = data.t
67 | 
68 |         return self.stage_2.predict(np.concatenate([t, x], axis=1))
69 | 
70 |     def ITE(self, data=None, t=None, x=None):
71 |         if data is None:
72 |             data = self.data.test
73 | 
74 |         if x is None:
75 |             x = data.x
76 | 
77 |         if t is None:
78 |             t = data.t
79 | 
80 |         ITE_0 = self.stage_2.predict(np.concatenate([t-t, x], axis=1))
81 |         ITE_1 = self.stage_2.predict(np.concatenate([t-t+1, x], axis=1))
82 |         ITE_t = self.stage_2.predict(np.concatenate([t, x], axis=1))
83 | 
84 |         return ITE_0,ITE_1,ITE_t
85 | 
86 |     def ATE(self, data=None, t=None, x=None):
87 |         ITE_0,ITE_1,ITE_t = self.ITE(data,t,x)
88 | 
89 |         return np.mean(ITE_1-ITE_0), np.mean(ITE_t-ITE_0)
90 | 


--------------------------------------------------------------------------------
/mliv/inference/twosls/vanilla2sls_v1.py:
--------------------------------------------------------------------------------
 1 | from sklearn.linear_model import LinearRegression
 2 | import numpy as np
 3 | from mliv.utils import set_seed
 4 | 
 5 | example = '''
 6 | from mliv.inference import Vanilla2SLS
 7 | 
 8 | model = Vanilla2SLS()
 9 | model.fit(data)
10 | ITE = model.predict(data.train)
11 | ATE,_ = model.ATE(data.train)
12 | '''
13 | 
14 | class Vanilla2SLS(object):
15 |     def __init__(self) -> None:
16 |         self.config = {
17 |                     'methodName': 'Vanilla2SLS',
18 |                     'seed': 2022
19 |                     }
20 | 
21 |     def set_Configuration(self, config):
22 |         self.config = config
23 | 
24 |     def fit(self, data, exp=-1, config=None):
25 |         if config is None:
26 |             config = self.config
27 | 
28 |         set_seed(config['seed'])
29 |         data.numpy()
30 | 
31 |         print('Run {}-th experiment for {}. '.format(exp, config['methodName']))
32 | 
33 |         stage_1 = LinearRegression()
34 |         stage_1.fit(np.concatenate([data.train.z, data.train.x], axis=1), data.train.t)
35 |         t_hat = stage_1.predict(np.concatenate([data.train.z, data.train.x], axis=1))
36 | 
37 |         stage_2 = LinearRegression()
38 |         stage_2.fit(np.concatenate([t_hat, data.train.x], axis=1), data.train.y)
39 | 
40 |         self.data = data
41 |         self.stage_1 = stage_1
42 |         self.stage_2 = stage_2
43 | 
44 |         print('End. ' + '-'*20)
45 | 
46 |         def estimation(data):
47 |             return stage_2.predict(np.concatenate([data.t-data.t, data.x], axis=1)), stage_2.predict(np.concatenate([data.t, data.x], axis=1))
48 | 
49 |         self.estimation = estimation
50 | 
51 |     def predict(self, data=None, t=None, x=None):
52 |         if data is None:
53 |             data = self.data.test
54 | 
55 |         if x is None:
56 |             x = data.x
57 | 
58 |         if t is None:
59 |             t = data.t
60 | 
61 |         return self.stage_2.predict(np.concatenate([t, x], axis=1))
62 | 
63 |     def ITE(self, data=None, t=None, x=None):
64 |         if data is None:
65 |             data = self.data.test
66 | 
67 |         if x is None:
68 |             x = data.x
69 | 
70 |         if t is None:
71 |             t = data.t
72 | 
73 |         ITE_0 = self.stage_2.predict(np.concatenate([t-t, x], axis=1))
74 |         ITE_1 = self.stage_2.predict(np.concatenate([t-t+1, x], axis=1))
75 |         ITE_t = self.stage_2.predict(np.concatenate([t, x], axis=1))
76 | 
77 |         return ITE_0,ITE_1,ITE_t
78 | 
79 |     def ATE(self, data=None, t=None, x=None):
80 |         ITE_0,ITE_1,ITE_t = self.ITE(data,t,x)
81 | 
82 |         return np.mean(ITE_1-ITE_0), np.mean(ITE_t-ITE_0)


--------------------------------------------------------------------------------
/mliv/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .loaddata import CausalDataset, cat, split
2 | from .setenv import set_cuda, set_seed, set_tf_seed, get_device


--------------------------------------------------------------------------------
/mliv/utils/loaddata.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import copy
  3 | import numpy as np
  4 | try:
  5 |     import torch
  6 |     from torch.utils.data import Dataset
  7 | except:
  8 |     print('No module named torch. Please pip install torch')
  9 | 
 10 | def get_var_df(df,var):
 11 |     var_cols = [c for c in df.columns if c.startswith(var)]
 12 |     return df[var_cols].to_numpy()
 13 | 
 14 | def cat(data_list, axis=1):
 15 |     try:
 16 |         output=torch.cat(data_list,axis)
 17 |     except:
 18 |         output=np.concatenate(data_list,axis)
 19 | 
 20 |     return output
 21 | 
 22 | def split(data, split_ratio=0.5):
 23 |     data1 = copy.deepcopy(data)
 24 |     data2 = copy.deepcopy(data)
 25 | 
 26 |     split_num = int(data.length * split_ratio)
 27 |     data1.split(0, split_num)
 28 |     data2.split(split_num, data.length)
 29 | 
 30 |     return data1, data2
 31 | 
 32 | class CausalDataset(object):
 33 |     def __init__(self, path):
 34 |         self.path  = path 
 35 |         self.train = getDataset(pd.read_csv(path + 'train.csv'))
 36 |         self.valid = getDataset(pd.read_csv(path + 'valid.csv'))
 37 |         self.test  = getDataset(pd.read_csv(path + 'test.csv'))
 38 | 
 39 |     def split(self, split_ratio=0.5, data=None):
 40 |         if data is None:
 41 |             data = self.train
 42 | 
 43 |         data1, data2 = split(data, split_ratio)
 44 |         self.data1 = data1
 45 |         self.data2 = data2
 46 | 
 47 |     def get_train(self):
 48 |         return self.train
 49 | 
 50 |     def get_valid(self):
 51 |         return self.valid
 52 | 
 53 |     def get_test(self):
 54 |         return self.test
 55 | 
 56 |     def get_data(self):
 57 |         return self.train,self.valid,self.test
 58 | 
 59 |     def tensor(self):
 60 |         self.train.tensor()
 61 |         self.valid.tensor()
 62 |         self.test.tensor()
 63 | 
 64 |     def double(self):
 65 |         self.train.double()
 66 |         self.valid.double()
 67 |         self.test.double()
 68 | 
 69 |     def float(self):
 70 |         self.train.float()
 71 |         self.valid.float()
 72 |         self.test.float()
 73 | 
 74 |     def detach(self):
 75 |         self.train.detach()
 76 |         self.valid.detach()
 77 |         self.test.detach()
 78 | 
 79 |     def to(self, device='cpu'):
 80 |         self.train.to(device)
 81 |         self.valid.to(device)
 82 |         self.test.to(device)
 83 | 
 84 |     def cpu(self):
 85 |         self.train.cpu()
 86 |         self.valid.cpu()
 87 |         self.test.cpu()
 88 | 
 89 |     def numpy(self):
 90 |         self.train.numpy()
 91 |         self.valid.numpy()
 92 |         self.test.numpy()
 93 | 
 94 | class TorchDataset(Dataset):
 95 |     def __init__(self, data, device='cpu', type='tensor'):
 96 |         if type == 'tensor':
 97 |             data.tensor()
 98 |         else:
 99 |             data.double()
100 |         data.to(device)
101 |         
102 |         self.data = data
103 |     
104 |     def __getitem__(self, idx):
105 |         var_dict = {}
106 |         for var in self.data.Vars:
107 |             exec(f'var_dict[\'{var}\']=self.{var}[idx]')
108 |         
109 |         return var_dict
110 | 
111 |     def __len__(self):
112 |         return self.data.length
113 | 
114 | class getDataset(Dataset):
115 |     def __init__(self, df):
116 |         self.length = len(df)
117 |         self.Vars = list(set([col[0] for col in df.columns]))
118 | 
119 |         for var in self.Vars:
120 |             exec(f'self.{var}=get_var_df(df, \'{var}\')')
121 | 
122 |         if not hasattr(self, 'i'):
123 |             self.i = self.z
124 |             self.Vars.append('i')
125 | 
126 |     def split(self, start, end):
127 |         for var in self.Vars:
128 |             try:
129 |                 exec(f'self.{var} = self.{var}[start:end]')
130 |             except:
131 |                 pass
132 | 
133 |         self.length = end - start
134 | 
135 |     def cpu(self):
136 |         for var in self.Vars:
137 |             try:
138 |                 exec(f'self.{var} = self.{var}.cpu()')
139 |             except:
140 |                 break
141 |     
142 |     def cuda(self,n=0):
143 |         for var in self.Vars:
144 |             try:
145 |                 exec(f'self.{var} = self.{var}.cuda({n})')
146 |             except:
147 |                 break
148 | 
149 |     def to(self,device='cpu'):
150 |         for var in self.Vars:
151 |             try:
152 |                 exec(f'self.{var} = self.{var}.to(\'{device}\')')
153 |             except:
154 |                 break
155 |     
156 |     def tensor(self):
157 |         for var in self.Vars:
158 |             try:
159 |                 exec(f'self.{var} = torch.Tensor(self.{var})')
160 |             except:
161 |                 break
162 | 
163 |     def float(self):
164 |         for var in self.Vars:
165 |             try:
166 |                 exec(f'self.{var} = torch.Tensor(self.{var}).float()')
167 |             except:
168 |                 break    
169 |             
170 |     def double(self):
171 |         for var in self.Vars:
172 |             try:
173 |                 exec(f'self.{var} = torch.Tensor(self.{var}).double()')
174 |             except:
175 |                 break
176 | 
177 |     def detach(self):
178 |         for var in self.Vars:
179 |             try:
180 |                 exec(f'self.{var} = self.{var}.detach()')
181 |             except:
182 |                 break
183 |             
184 |     def numpy(self):
185 |         try:
186 |             self.detach()
187 |         except:
188 |             pass
189 | 
190 |         try:
191 |             self.cpu()
192 |         except:
193 |             pass
194 | 
195 |         for var in self.Vars:
196 |             try:
197 |                 exec(f'self.{var} = self.{var}.numpy()')
198 |             except:
199 |                 break
200 | 
201 |     def pandas(self, path=None):
202 |         var_list = []
203 |         var_dims = []
204 |         var_name = []
205 |         for var in self.Vars:
206 |             exec(f'var_list.append(self.{var})')
207 |             exec(f'var_dims.append(self.{var}.shape[1])')
208 |         for i in range(len(self.Vars)):
209 |             for d in range(var_dims[i]):
210 |                 var_name.append(self.Vars[i]+str(d))
211 |         df = pd.DataFrame(np.concatenate(var_list, axis=1),columns=var_name)
212 | 
213 |         if path is not None:
214 |             df.to_csv(path, index=False)
215 |         return df
216 | 
217 |     def __getitem__(self, idx):
218 |         var_dict = {}
219 |         for var in self.Vars:
220 |             exec(f'var_dict[\'{var}\']=self.{var}[idx]')
221 |         
222 |         return var_dict
223 | 
224 |     def __len__(self):
225 |         return self.length


--------------------------------------------------------------------------------
/mliv/utils/setenv.py:
--------------------------------------------------------------------------------
 1 | import numpy as np 
 2 | import random
 3 | import argparse
 4 | import os
 5 | from numba import cuda
 6 | 
 7 | try:
 8 |     import torch
 9 | except:
10 |     pass
11 | try:
12 |     import tensorflow as tf
13 | except:
14 |     pass
15 | 
16 | def clear_cache():
17 |     try:
18 |         if torch.cuda.is_available():
19 |             cuda.select_device(0)
20 |             cuda.close()
21 |     except:
22 |         pass
23 | 
24 | def set_cuda(CUDA='3'):
25 |     os.environ["CUDA_VISIBLE_DEVICES"] = CUDA if isinstance(CUDA,str) else str(CUDA)
26 |     os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
27 | 
28 | def set_seed(seed=2021):
29 |     np.random.seed(seed)
30 |     random.seed(seed)
31 |     os.environ['PYTHONHASHSEED'] = str(seed)
32 | 
33 |     torch.manual_seed(seed)
34 |     torch.cuda.manual_seed_all(seed)
35 |     torch.cuda.manual_seed(seed)
36 | 
37 |     torch.backends.cudnn.deterministic = True
38 |     torch.backends.cudnn.benchmark = False
39 |     
40 | def set_tf_seed(seed=2021):
41 |     np.random.seed(seed)
42 |     random.seed(seed)
43 |     os.environ['PYTHONHASHSEED'] = str(seed)
44 | 
45 |     tf.reset_default_graph()
46 |     tf.compat.v1.set_random_seed(seed)
47 | 
48 | def get_device(GPU=True):
49 |     device = torch.device('cuda' if torch.cuda.is_available() and GPU else "cpu")
50 |     if torch.cuda.is_available():
51 |         torch.cuda.empty_cache()
52 | 
53 |     return device


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | with open("README.md", 'r') as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | setuptools.setup(
 7 |     name="mliv",       # 模块名称
 8 |     version="0.0.2",                  # 当前版本
 9 |     author="anpeng wu",               # 作者
10 |     author_email="anpwu2019@gmail.com",        # 作者邮箱
11 |     description="machine learning for instrumental variable (IV) regression",    # 模块简介
12 |     long_description=long_description,          # 模块详细介绍
13 |     long_description_content_type="text/markdown",  # 模块详细介绍格式
14 |     url="https://github.com/anpwu/mliv.git",               # 模块github地址
15 |     packages=setuptools.find_packages(),        # 自动找到项目中导入的模块
16 |     # 模块相关的元数据（更多的描述）
17 |     classifiers=[
18 |         "Programming Language :: Python",
19 |         "License :: OSI Approved :: Apache Software License",
20 |         "Operating System :: OS Independent",
21 |     ],
22 |     # 依赖模块
23 |     install_requires=[
24 |         'argparse',
25 |         'pillow',
26 |         'numba',
27 |         'cvxopt',
28 |     ],
29 |   # python版本
30 |     python_requires=">=3.7",
31 | )  


--------------------------------------------------------------------------------