├── .circleci └── config.yml ├── .github └── CODEOWNERS ├── .gitignore ├── LICENSE ├── MANIFEST.IN ├── Makefile ├── README.md ├── examples └── .gitkeep ├── setup.cfg ├── setup.py ├── src └── pydtr │ ├── __init__.py │ ├── iqlearn │ ├── __init__.py │ ├── base.py │ └── regression.py │ └── version.py └── tests ├── test_iqlearn_sklearn_predict.py └── test_iqlearn_sm_predict.py /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | orbs: 3 | codecov: codecov/codecov@1.0.2 4 | jobs: 5 | build-and-test: 6 | docker: 7 | - image: circleci/python:3.6 8 | auth: 9 | username: fullflu 10 | password: $DOCKERHUB_PASSWORD 11 | steps: 12 | - checkout 13 | - restore_cache: 14 | key: dependency-cache-{{ checksum "setup.py" }}-{{ checksum "Makefile" }} 15 | - run: 16 | name: Setup testing environment 17 | command: | 18 | python3 -m venv venv 19 | . venv/bin/activate 20 | make dev 21 | - save_cache: 22 | key: dependency-cache-{{ checksum "setup.py" }}-{{ checksum "Makefile" }} 23 | paths: 24 | - "venv" 25 | - run: 26 | name: Run Tests 27 | command: | 28 | . venv/bin/activate 29 | make test 30 | - codecov/upload: 31 | file: coverage.xml 32 | deploy: 33 | docker: 34 | - image: circleci/python:3.6 35 | auth: 36 | username: fullflu 37 | password: $DOCKERHUB_PASSWORD 38 | steps: 39 | - checkout 40 | - restore_cache: 41 | key: dependency-cache-{{ checksum "setup.py" }}-{{ checksum "Makefile" }} 42 | - run: 43 | name: install python dependencies 44 | command: | 45 | python3 -m venv venv 46 | . venv/bin/activate 47 | make dev 48 | - save_cache: 49 | key: dependency-cache-{{ checksum "setup.py" }}-{{ checksum "Makefile" }} 50 | paths: 51 | - "venv" 52 | - run: 53 | name: verify git tag vs. version 54 | command: | 55 | python3 -m venv venv 56 | . venv/bin/activate 57 | python setup.py verify 58 | - run: 59 | name: init .pypirc 60 | command: | 61 | echo -e "[pypi]" >> ~/.pypirc 62 | echo -e "username = fullflu" >> ~/.pypirc 63 | echo -e "password = $PYPI_PASSWORD" >> ~/.pypirc 64 | - run: 65 | name: create packages 66 | command: | 67 | make package 68 | - run: 69 | name: upload to pypi 70 | command: | 71 | . venv/bin/activate 72 | twine upload --repository pypi dist/* 73 | workflows: 74 | version: 2 75 | workflow: 76 | jobs: 77 | - build-and-test: 78 | context: docker-hub-creds 79 | filters: 80 | tags: 81 | only: /.*/ 82 | - deploy: 83 | context: 84 | - docker-hub-creds 85 | - pypi-publish 86 | requires: 87 | - build-and-test 88 | filters: 89 | branches: 90 | ignore: /.*/ 91 | tags: 92 | only: /[0-9]+(\.[0-9]+)*/ 93 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @fullflu 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # cache 2 | __pycache__ 3 | .mypy_cache 4 | 5 | # Jupyter Notebook 6 | .ipynb_checkpoints 7 | 8 | # egg 9 | *.egg-info/ 10 | .eggs/ 11 | 12 | # python 13 | .python-version 14 | 15 | # test 16 | .coverage 17 | htmlcov/ 18 | 19 | # env 20 | .env 21 | .envrc 22 | 23 | # build 24 | build/ 25 | dist/ 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2020, fullflu 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /MANIFEST.IN: -------------------------------------------------------------------------------- 1 | include README.md 2 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | dev: 2 | pip install --upgrade pip 3 | pip install -e ./. 4 | pip install category_encoders 5 | pip install pytest 6 | pip install coverage 7 | pip install twine 8 | 9 | package: 10 | python setup.py sdist 11 | python setup.py bdist_wheel 12 | 13 | test: 14 | coverage run --source=src/pydtr -m pytest 15 | coverage report 16 | coverage xml 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pydtr 2 | 3 | [![CircleCI](https://circleci.com/gh/fullflu/pydtr.svg?style=shield)](https://app.circleci.com/pipelines/github/fullflu/pydtr) 4 | [![codecov](https://codecov.io/gh/fullflu/pydtr/branch/master/graph/badge.svg)](https://codecov.io/gh/fullflu/pydtr) 5 | [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) 6 | 7 | 8 | ## Description 9 | 10 | This is a python library to conduct a dynamic treatment regime ([DTR](https://en.wikipedia.org/wiki/Dynamic_treatment_regime)), `pydtr`. 11 | 12 | A DTR is a paradigm that attempts to select optimal treatments adaptively for individual patients. 13 | 14 | Pydtr enables you to implement DTR methods easily by using sklearn-based interfaces. 15 | 16 | | Method | Single binary treatment | Multiple treatments | Multinomial treatment | Continuous treatment | Modeling flexibility | Interpretability | 17 | | ---- | ---- | ---- | ---- | ---- | ---- | ---- | 18 | | IqLearnReg
(with sklearn) | :white_check_mark: | :white_check_mark: | :white_check_mark:
(with pipeline) | | :white_check_mark:
(with arbitrary regression models) | | 19 | | IqLearnReg
(with statsmodels) | :white_check_mark: | :white_check_mark: | :white_check_mark: | | limited to OLS | :white_check_mark:
(with confidence intervals) | 20 | | GEstimation | WIP | | WIP | WIP | WIP | WIP | 21 | 22 | `IqLearnReg` means a regression method of iterative q-learning. 23 | 24 | When there are categorical independent variables and you use a sklearn model as a regression function, you need to encode the categorical variables before using the model. 25 | 26 | We recommend to encode categorical variables by `category_encoders` and combine the encoders with the sklearn model by `sklearn.pipeline`. 27 | 28 | G-estimation, a famous method of DTR, is now unavailable. 29 | 30 | ## Requirements 31 | 32 | - python>=3.6 33 | - pandas>=1.1.2 34 | - scikit-learn>=0.23.2 35 | - numpy>=1.19.2 36 | - statsmodels>=0.12.0 37 | 38 | ## Installation 39 | 40 | ### From pypi 41 | 42 | ``` 43 | pip install pydtr 44 | ``` 45 | 46 | ### From source 47 | 48 | ``` 49 | git clone https://github.com/fullflu/pydtr.git 50 | cd pydtr 51 | python setup.py install 52 | ``` 53 | 54 | ## Usage 55 | 56 | ### Iterative Q Learning (IqLearnReg) 57 | 58 | You need to import libraries and prepare data. 59 | 60 | ```python 61 | # import 62 | import numpy as np 63 | import pandas as pd 64 | from sklearn.ensemble import RandomForestRegressor 65 | 66 | from pydtr.iqlearn.regression import IqLearnReg 67 | 68 | # create sample dataframe 69 | n = 10 70 | thres = int(n / 2) 71 | df = pd.DataFrame() 72 | df["L1"] = np.arange(n) 73 | df["A1"] = [0, 1] * int(n / 2) 74 | df["A2"] = [0] * int(n / 2) + [1] * int(n / 2) 75 | df["Y1"] = np.zeros(n) 76 | df["Y2"] = np.zeros(n) 77 | ``` 78 | 79 | You can use sklearn-based models. 80 | 81 | ```python 82 | # set model info 83 | model_info = [ 84 | { 85 | "model": RandomForestRegressor(), 86 | "action_dict": {"A1": [0, 1]}, 87 | "feature": ["L1", "A1"], 88 | "outcome": "Y1" 89 | }, 90 | { 91 | "model": RandomForestRegressor(), 92 | "action_dict": {"A2": [0, 1]}, 93 | "feature": ["L1", "A1", "Y1", "A2"], 94 | "outcome": "Y2" 95 | } 96 | ] 97 | # fit model 98 | dtr_model = IqLearnReg( 99 | n_stages=2, 100 | model_info=model_info 101 | ) 102 | dtr_model.fit(df) 103 | 104 | # predict optimal atcions 105 | opt_action_stage_1 = dtr_model.predict(df, 0) 106 | opt_action_stage_2 = dtr_model.predict(df, 1) 107 | opt_action_all_stages = dtr_model.predict_all_stages(df) 108 | ``` 109 | 110 | You can also use statsmodels-based models. 111 | 112 | ```python 113 | # set model info 114 | model_info = [ 115 | { 116 | "model": "p_outcome ~ L1 * A1", 117 | "action_dict": {"A1": [0, 1]}, 118 | "feature": ["L1", "A1"], 119 | "outcome": "Y1" 120 | }, 121 | { 122 | "model": "p_outcome ~ L1 + A1 + Y1 * A2", 123 | "action_dict": {"A2": [0, 1]}, 124 | "feature": ["L1", "A1", "Y1", "A2"], 125 | "outcome": "Y2" 126 | } 127 | ] 128 | # fit model 129 | dtr_model = IqLearnReg( 130 | n_stages=2, 131 | model_info=model_info 132 | ) 133 | dtr_model.fit(df) 134 | 135 | # predict optimal atcions 136 | opt_action_stage_1 = dtr_model.predict(df, 0) 137 | opt_action_stage_2 = dtr_model.predict(df, 1) 138 | opt_action_all_stages = dtr_model.predict_all_stages(df) 139 | ``` 140 | 141 | Please see [examples](https://github.com/fullflu/pydtr/blob/master/examples/) to get more information. 142 | 143 | ## Authors 144 | 145 | - [@fullflu](https://github.com/fullflu) 146 | 147 | ## Contributors 148 | 149 | Please feel free to create issues or to send pull-requests! 150 | 151 | If all checkes have passed in pull-requests, I will merge and release them. 152 | 153 | ## License 154 | 155 | [BSD](https://github.com/fullflu/pydtr/blob/master/LICENSE) 156 | 157 | 158 | ## Structure 159 | 160 | ``` 161 | ├── .circleci 162 | │   ├── config.yml 163 | ├── .github 164 | │   ├── CODEOWNERS 165 | ├── LICENSE 166 | ├── MANIFEST.IN 167 | ├── Makefile 168 | ├── README.md 169 | ├── examples 170 | │   ├── ...several notebooks... 171 | ├── setup.cfg 172 | ├── setup.py 173 | ├── src 174 | │   ├── pydtr 175 | │   │   ├── __init__.py 176 | │   │   └── iqlearn 177 | │   │   ├── __init__.py 178 | │   │   ├── base.py 179 | │   │   └── regression.py 180 | └── tests 181 | ├── test_iqlearn_sklearn_predict.py 182 | └── test_iqlearn_sm_predict.py 183 | ``` 184 | 185 | ## References 186 | 187 | - Chakraborty, B, Moodie, EE. *Statistical Methods for Dynamic Treatment Regimes.* Springer, New York, 2013. 188 | -------------------------------------------------------------------------------- /examples/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fullflu/pydtr/8fbf836b2871ddfe4298a76c1257bfd06b0953ed/examples/.gitkeep -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | test=pytest 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import os 5 | import sys 6 | 7 | from setuptools import setup 8 | from setuptools import find_packages 9 | from setuptools.command.install import install 10 | from codecs import open 11 | 12 | from src.pydtr.version import __version__ 13 | 14 | here = os.path.abspath(os.path.dirname(__file__)) 15 | 16 | with open(os.path.join(here, 'README.md'), encoding='utf-8') as f: 17 | long_description = f.read() 18 | 19 | 20 | class VerifyVersionCommand(install): 21 | """Custom command to verify that the git tag matches our version""" 22 | description = 'verify that the git tag matches our version' 23 | 24 | def run(self): 25 | tag = os.getenv('CIRCLE_TAG') 26 | 27 | if tag != __version__: 28 | info = "Git tag: {0} does not match the version of this app: {1}".format( 29 | tag, __version__ 30 | ) 31 | sys.exit(info) 32 | 33 | 34 | setup( 35 | name='pydtr', 36 | version=__version__, 37 | description='Python library of Dynamic Treatment Regimes', 38 | long_description=long_description, 39 | long_description_content_type='text/markdown', 40 | url='https://github.com/fullflu/pydtr', 41 | author='fullflu', 42 | author_email='k.takayama0902@gmail.com', 43 | license='BSD', 44 | install_requires=[ 45 | 'pandas>=1.1.2', 46 | 'scikit-learn>=0.23.2', 47 | 'numpy>=1.19.2', 48 | 'statsmodels>=0.12.0' 49 | ], 50 | keywords=['dynamic treatment regimes', 'reinforcement learning', 'dtr'], 51 | include_package_data=True, 52 | package_dir={'': "src"}, 53 | packages=find_packages('src'), 54 | classifiers=[ 55 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 56 | 'License :: OSI Approved :: BSD License', 57 | 'Programming Language :: Python :: 3', 58 | 'Programming Language :: Python :: 3.6', 59 | ], 60 | setup_requires=["pytest-runner"], 61 | tests_require=["pytest", "pytest-cov", "coverage", "category_encoders"], 62 | cmdclass={ 63 | 'verify': VerifyVersionCommand, 64 | } 65 | ) 66 | -------------------------------------------------------------------------------- /src/pydtr/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__ 2 | -------------------------------------------------------------------------------- /src/pydtr/iqlearn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fullflu/pydtr/8fbf836b2871ddfe4298a76c1257bfd06b0953ed/src/pydtr/iqlearn/__init__.py -------------------------------------------------------------------------------- /src/pydtr/iqlearn/base.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | from typing import Dict 3 | from abc import ABCMeta, abstractmethod 4 | 5 | 6 | import pandas as pd 7 | import numpy as np 8 | from sklearn.utils import resample 9 | 10 | 11 | class IqLearnBase(object): 12 | """ 13 | Base class of iterative q learning 14 | 15 | Attributes 16 | ---------- 17 | n_stages : int 18 | the number of total stages 19 | model_info : list of dict 20 | list of model information dictionary of each stage 21 | - model: string or sklearn-based instance 22 | model object 23 | - action_dict: dict 24 | action varible -> domain of the variable 25 | - feature: str 26 | feature variable names 27 | - outcome: str 28 | a outcome variable name 29 | n_bs : int 30 | the number of bootstrap sampling (0 means no bootstrap) 31 | """ 32 | def __init__(self, n_stages: int, model_info: list, n_bs: int = 0) -> None: 33 | """ 34 | Parameters 35 | ---------- 36 | n_stages : int 37 | The number of total stages 38 | model_info : list of dict 39 | List of model information dictionary of each stage 40 | - model: string or sklearn-based instance 41 | Model object 42 | - action_dict: dict 43 | Action varible -> domain of the variable 44 | - feature: str 45 | Feature variable names 46 | - outcome: str 47 | An outcome variable name 48 | n_bs : int 49 | The number of bootstrap sampling (0 means no bootstrap) 50 | """ 51 | assert n_stages == len(model_info), "n_stages must be the same as the length of model_info" 52 | self.n_stages = n_stages 53 | self.model_info = model_info 54 | self.n_bs = n_bs 55 | self._init_set() 56 | 57 | def _get_max_val_df(self, model, X: pd.DataFrame, t: int) -> pd.DataFrame: 58 | """ 59 | Get maximum value of Q function of stage t (ref: max_{A} Q(X, A)) 60 | """ 61 | action_dict = self.model_info[t]["action_dict"] 62 | keys = list(action_dict.keys()) 63 | # dataframe to store pseudo outcomes 64 | max_val_df = pd.DataFrame(columns=keys) 65 | # initialize maximum value 66 | max_val_df["val"] = [-1 * np.inf] * X.shape[0] 67 | tmp_df = X.reset_index(drop=True).copy() 68 | for tmp_dict in [dict(zip(action_dict, vs)) for vs in itertools.product(*action_dict.values())]: 69 | # consider multiple action variables 70 | tmp_keys = list(tmp_dict.keys()) 71 | tmp_values = list(tmp_dict.values()) 72 | if len(tmp_values) == 1: 73 | tmp_values = tmp_values[0] 74 | # raise ValueError 75 | # set action values 76 | tmp_df[tmp_keys] = tmp_values 77 | # predict q value for the action values 78 | val_a = model.predict(tmp_df) 79 | # update optimal actions and q values 80 | flag = max_val_df["val"] < val_a 81 | max_val_df.loc[flag.values, tmp_keys] = tmp_values 82 | max_val_df.loc[flag.values, "val"] = val_a[flag.values] 83 | # reset independent variables 84 | tmp_df = X.reset_index(drop=True).copy() 85 | return max_val_df 86 | 87 | def _get_p_outcome(self, model: object, X: pd.DataFrame, y: pd.Series, t: int) -> pd.Series: 88 | """ 89 | Get pseudo-outcome of stage t 90 | """ 91 | # return pseudo outcome 92 | max_val_df = self._get_max_val_df(model, X, t + 1) 93 | return max_val_df["val"].values + y.values 94 | 95 | @staticmethod 96 | def _sample_bs(df: pd.DataFrame, size_bs: int = -1) -> pd.DataFrame: 97 | """ 98 | bootstrap sampling 99 | """ 100 | # bootstrap sampling 101 | if size_bs == -1: 102 | size_bs = df.shape[0] 103 | return resample(df, n_samples=size_bs) 104 | 105 | def fit(self, df: pd.DataFrame): 106 | """ 107 | Fit dtr models 108 | 109 | Parameters 110 | ---------- 111 | df: pandas.dataframe 112 | input data (each row contains all stage information of each individual) 113 | 114 | Returns 115 | ------- 116 | self 117 | """ 118 | # fit models using all data 119 | for t in reversed(range(self.n_stages)): 120 | print("Stage: {}".format(t)) 121 | X = df[self.model_info[t]["feature"]] 122 | y = df[self.model_info[t]["outcome"]] 123 | if t == self.n_stages - 1: 124 | p_outcome = y.values 125 | else: 126 | X2 = df[self.model_info[t + 1]["feature"]] 127 | y2 = df[self.model_info[t + 1]["outcome"]] 128 | p_outcome = self._get_p_outcome(self.model_all[t + 1], X2, y2, t) 129 | self._fit_model_all_data(X, p_outcome, t) 130 | 131 | # fit models using bootstrap 132 | for i in range(self.n_bs): 133 | df_i = self._sample_bs(df) 134 | for t in reversed(range(self.n_stages)): 135 | # extract feature and outcome 136 | X = df_i[self.model_info[t]["feature"]] 137 | y = df_i[self.model_info[t]["outcome"]] 138 | if t == self.n_stages - 1: 139 | p_outcome = y.values 140 | else: 141 | X2 = df_i[self.model_info[t + 1]["feature"]] 142 | y2 = df_i[self.model_info[t + 1]["outcome"]] 143 | p_outcome = self._get_p_outcome(self.model_all[t + 1], X2, y2, t) 144 | # fit model of stage t 145 | self._fit_model(X, p_outcome, t, i) 146 | return self 147 | 148 | @abstractmethod 149 | def _fit_model(self, X: pd.DataFrame, p_outcome: np.array, t: int, i: int) -> None: 150 | pass 151 | 152 | @abstractmethod 153 | def _fit_model_all_data(self, X: pd.DataFrame, p_outcome: np.array, t: int) -> None: 154 | pass 155 | 156 | def _init_set(self) -> None: 157 | self.model_all = {} 158 | self.models = {} 159 | for t in range(self.n_stages): 160 | # TODO: check model type 161 | # self._check_model_type(self.model_info[t]["model"]) 162 | self.model_all[t] = None 163 | if self.n_bs: 164 | self.models[t] = np.array([None] * (self.n_bs)) 165 | 166 | def get_params(self) -> pd.DataFrame: 167 | # get estimated parameters 168 | params = pd.DataFrame() 169 | for t in reversed(range(self.n_stages)): 170 | if type(self.model_info[t]["model"]) == str: 171 | tmp_df = pd.melt(pd.DataFrame([i.params for i in self.models[t]])) 172 | tmp_df["stage"] = t 173 | params = pd.concat([params, tmp_df]) 174 | return params 175 | 176 | def predict(self, df: pd.DataFrame, t: int) -> pd.DataFrame: 177 | """ 178 | Predict optimal treatment of stage t for each row 179 | 180 | Parameters 181 | ---------- 182 | df: pandas.dataframe 183 | input data (each row contains all stage information of each individual) 184 | t: int 185 | stage 186 | 187 | Returns 188 | ------- 189 | max_val_df: pandas.dataframe 190 | optimal treatments and their q-values 191 | """ 192 | # return optimal actions for the specified stage 193 | X = df[self.model_info[t]["feature"]].copy() 194 | max_val_df = self._get_max_val_df(self.model_all[t], X, t) 195 | return max_val_df 196 | 197 | def predict_all_stages(self, df: pd.DataFrame) -> pd.DataFrame: 198 | """ 199 | Predict optimal treatment of all stages for each row 200 | 201 | Parameters 202 | ---------- 203 | df: pandas.dataframe 204 | input data (each row contains all stage information of each individual) 205 | t: int 206 | stage 207 | 208 | Returns 209 | ------- 210 | max_df: pandas.dataframe 211 | optimal treatments, their q-values, and stage information 212 | """ 213 | # return optimal actions for all stages 214 | max_df = pd.DataFrame() 215 | for t in range(self.n_stages): 216 | X = df[self.model_info[t]["feature"]] 217 | max_val_df = self._get_max_val_df(self.model_all[t], X, t) 218 | max_val_df["stage"] = t 219 | max_val_df["row_id"] = np.arange(X.shape[0]) 220 | max_df = pd.concat([max_df, max_val_df], sort=True).reset_index(drop=True) 221 | return max_df 222 | -------------------------------------------------------------------------------- /src/pydtr/iqlearn/regression.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import statsmodels.formula.api as smf 4 | 5 | from .base import IqLearnBase 6 | 7 | 8 | class IqLearnReg(IqLearnBase): 9 | def _fit_model(self, X: pd.DataFrame, p_outcome: np.array, t: int, i: int) -> None: 10 | """ 11 | Fit dtr model of stage t and bootstrap i 12 | 13 | Parameters 14 | ---------- 15 | X: pandas.dataframe 16 | input data (each row contains feature of each individual) 17 | p_outcome: np.array 18 | pseudo outcome of stage t 19 | t: int 20 | stage 21 | i: int 22 | bootstrap index 23 | 24 | Returns 25 | ------- 26 | None 27 | """ 28 | if type(self.model_info[t]["model"]) == str: 29 | df = X.copy() 30 | df["p_outcome"] = p_outcome 31 | self.models[t][i] = smf.ols(formula=self.model_info[t]["model"], data=df).fit() 32 | else: 33 | self.models[t][i] = self.model_info[t]["model"].fit(X, p_outcome) 34 | 35 | def _fit_model_all_data(self, X: pd.DataFrame, p_outcome: np.array, t: int) -> None: 36 | """ 37 | Fit dtr model of stage t 38 | 39 | Parameters 40 | ---------- 41 | X: pandas.dataframe 42 | input data (each row contains feature of each individual) 43 | p_outcome: np.array 44 | pseudo outcome of stage t 45 | t: int 46 | stage 47 | 48 | Returns 49 | ------- 50 | None 51 | """ 52 | if type(self.model_info[t]["model"]) == str: 53 | df = X.copy() 54 | df["p_outcome"] = p_outcome 55 | self.model_all[t] = smf.ols(formula=self.model_info[t]["model"], data=df).fit() 56 | else: 57 | self.model_all[t] = self.model_info[t]["model"].fit(X, p_outcome) 58 | -------------------------------------------------------------------------------- /src/pydtr/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.2" 2 | -------------------------------------------------------------------------------- /tests/test_iqlearn_sklearn_predict.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import category_encoders as ce 4 | from sklearn.base import BaseEstimator, RegressorMixin 5 | from sklearn.ensemble import RandomForestRegressor 6 | from pandas._testing import assert_frame_equal 7 | from sklearn.linear_model import Ridge 8 | from sklearn.preprocessing import StandardScaler 9 | from sklearn.pipeline import Pipeline 10 | 11 | from pydtr.iqlearn.regression import IqLearnReg 12 | 13 | 14 | class RegWrapperRule1(BaseEstimator, RegressorMixin): 15 | def __init__(self, thres: int, fit_value: int) -> None: 16 | self.thres = thres 17 | self.fit_value = fit_value 18 | 19 | def fit(self, X: pd.DataFrame, y: np.array): 20 | return self 21 | 22 | def predict(self, X): 23 | flag = X["L1"] < self.thres 24 | q_val = pd.Series(np.zeros(X.shape[0])) 25 | # if L1 >= thres; then q(L1, 1) > q(L1, 0); else q(L1, 1) < q(L1, 0) 26 | q_val[flag] = X.loc[flag, "A1"] * 2 + (1 - X.loc[flag, "A1"]) * 1 27 | q_val[~flag] = X.loc[~flag, "A1"] * 1 + (1 - X.loc[~flag, "A1"]) * 2 28 | return q_val 29 | 30 | 31 | class RegWrapperRule2(BaseEstimator, RegressorMixin): 32 | def __init__(self, thres: int, fit_value: int) -> None: 33 | self.thres = thres 34 | self.fit_value = fit_value 35 | 36 | def fit(self, X: pd.DataFrame, y: np.array): 37 | return self 38 | 39 | def predict(self, X: pd.DataFrame) -> pd.Series: 40 | flag = X["L1"] < self.thres 41 | q_val = pd.Series(np.zeros(X.shape[0])) 42 | # if L1 >= thres; then q(L1, 1) < q(L1, 0); else q(L1, 1) > q(L1, 0) 43 | q_val[flag] = X.loc[flag, "A2"] * 1 + (1 - X.loc[flag, "A2"]) * 2 44 | q_val[~flag] = X.loc[~flag, "A2"] * 2 + (1 - X.loc[~flag, "A2"]) * 1 45 | return q_val 46 | 47 | 48 | def test_iqlearn_regwrapper_rule(): 49 | # setup params 50 | n = 10 51 | thres = int(n / 2) 52 | # sample rule base models 53 | model1 = RegWrapperRule1(thres, 1) 54 | model2 = RegWrapperRule2(thres, 2) 55 | # sample dataframe 56 | df = pd.DataFrame() 57 | df["L1"] = np.arange(n) 58 | df["A1"] = [0, 1] * int(n / 2) 59 | df["A2"] = [0] * int(n / 2) + [1] * int(n / 2) 60 | df["Y1"] = np.zeros(n) 61 | df["Y2"] = np.zeros(n) 62 | # set model info 63 | model_info = [ 64 | { 65 | "model": model1, 66 | "action_dict": {"A1": [0, 1]}, 67 | "feature": ["L1", "A1"], 68 | "outcome": "Y1" 69 | }, 70 | { 71 | "model": model2, 72 | "action_dict": {"A2": [0, 1]}, 73 | "feature": ["L1", "A1", "Y1", "A2"], 74 | "outcome": "Y2" 75 | } 76 | ] 77 | # fit model (dummy) 78 | dtr_model = IqLearnReg( 79 | n_stages=2, 80 | model_info=model_info 81 | ) 82 | dtr_model.fit(df) 83 | assert dtr_model.model_all[0].fit_value == 1 84 | assert dtr_model.model_all[1].fit_value == 2 85 | # predict optimal atcions 86 | action_1 = dtr_model.predict(df, 0) 87 | action_2 = dtr_model.predict(df, 1) 88 | action_all = dtr_model.predict_all_stages(df) 89 | # stage 1 test 90 | true_action_1 = [1] * int(n / 2) + [0] * int(n / 2) 91 | assert all([a == b for a, b in zip(action_1["A1"].tolist(), true_action_1)]) 92 | # stage 2 test 93 | true_action_2 = [0] * int(n / 2) + [1] * int(n / 2) 94 | assert all([a == b for a, b in zip(action_2["A2"].tolist(), true_action_2)]) 95 | # all stage test 96 | assert action_all.shape[0] == action_1.shape[0] * 2 97 | a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True) 98 | a2 = action_all.query("stage == 1")[["A2", "val"]].reset_index(drop=True) 99 | assert_frame_equal(action_1, a1) 100 | assert_frame_equal(action_2, a2) 101 | # function test 102 | q_val_1 = dtr_model._get_max_val_df(dtr_model.model_all[0], df, 0) 103 | assert all([a == b for a, b in zip(q_val_1["A1"].tolist(), true_action_1)]) 104 | q_val_2 = dtr_model._get_max_val_df(dtr_model.model_all[1], df, 1) 105 | assert all([a == b for a, b in zip(q_val_2["A2"].tolist(), true_action_2)]) 106 | # fit bootstrap model (dummy) 107 | dtr_model = IqLearnReg( 108 | n_stages=2, 109 | model_info=model_info, 110 | n_bs=2 111 | ) 112 | dtr_model.fit(df) 113 | assert dtr_model.model_all[0].fit_value == 1 114 | assert dtr_model.model_all[1].fit_value == 2 115 | # check bootstrap model 116 | assert dtr_model.models[0][0].fit_value == 1 117 | assert dtr_model.models[1][0].fit_value == 2 118 | assert len(dtr_model.models[0]) == 2 119 | 120 | 121 | def test_iqlearn_rf(): 122 | # setup params 123 | n = 10 124 | thres = int(n / 2) 125 | # rf models 126 | model1 = RandomForestRegressor() 127 | model2 = RandomForestRegressor() 128 | # sample dataframe 129 | df = pd.DataFrame() 130 | df["L1"] = np.arange(n) 131 | df["A1"] = [0, 1] * int(n / 2) 132 | df["A2"] = [0] * int(n / 2) + [1] * int(n / 2) 133 | df["Y1"] = np.zeros(n) 134 | df["Y2"] = np.zeros(n) 135 | # set model info 136 | model_info = [ 137 | { 138 | "model": model1, 139 | "action_dict": {"A1": [0, 1]}, 140 | "feature": ["L1", "A1"], 141 | "outcome": "Y1" 142 | }, 143 | { 144 | "model": model2, 145 | "action_dict": {"A2": [0, 1]}, 146 | "feature": ["L1", "A1", "Y1", "A2"], 147 | "outcome": "Y2" 148 | } 149 | ] 150 | # fit model 151 | dtr_model = IqLearnReg( 152 | n_stages=2, 153 | model_info=model_info 154 | ) 155 | dtr_model.fit(df) 156 | # predict optimal atcions 157 | action_1 = dtr_model.predict(df, 0) 158 | action_2 = dtr_model.predict(df, 1) 159 | action_all = dtr_model.predict_all_stages(df) 160 | # stage 1 test 161 | assert action_1.shape[0] == df.shape[0] 162 | # stage 2 test 163 | assert action_2.shape[0] == df.shape[0] 164 | # all stage test 165 | assert action_all.shape[0] == action_1.shape[0] * 2 166 | a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True) 167 | a2 = action_all.query("stage == 1")[["A2", "val"]].reset_index(drop=True) 168 | assert_frame_equal(action_1, a1) 169 | assert_frame_equal(action_2, a2) 170 | # fit bootstrap model 171 | dtr_model = IqLearnReg( 172 | n_stages=2, 173 | model_info=model_info, 174 | n_bs=2 175 | ) 176 | dtr_model.fit(df) 177 | assert len(dtr_model.models[0]) == 2 178 | 179 | 180 | def test_iqlearn_rf_multiple_actions(): 181 | # setup params 182 | n = 10 183 | thres = int(n / 2) 184 | # rf models 185 | model1 = RandomForestRegressor() 186 | model2 = RandomForestRegressor() 187 | # sample dataframe 188 | df = pd.DataFrame() 189 | df["L1"] = np.arange(n) 190 | df["A1"] = [0, 1] * int(n / 2) 191 | df["A2"] = [0] * int(n / 2) + [1] * int(n / 2) 192 | df["Y1"] = np.zeros(n) 193 | df["Y2"] = np.zeros(n) 194 | # set model info 195 | model_info = [ 196 | { 197 | "model": model1, 198 | "action_dict": {"A1": [0, 1]}, 199 | "feature": ["L1", "A1"], 200 | "outcome": "Y1" 201 | }, 202 | { 203 | "model": model2, 204 | "action_dict": {"A1": [0, 1], "A2": [0, 1]}, 205 | "feature": ["L1", "A1", "Y1", "A2"], 206 | "outcome": "Y2" 207 | } 208 | ] 209 | # fit model 210 | dtr_model = IqLearnReg( 211 | n_stages=2, 212 | model_info=model_info 213 | ) 214 | dtr_model.fit(df) 215 | # predict optimal atcions 216 | action_1 = dtr_model.predict(df, 0) 217 | action_2 = dtr_model.predict(df, 1) 218 | action_all = dtr_model.predict_all_stages(df) 219 | # stage 1 test 220 | assert action_1.shape[0] == df.shape[0] 221 | # stage 2 test 222 | assert action_2.shape[0] == df.shape[0] 223 | # all stage test 224 | assert action_all.shape[0] == action_1.shape[0] * 2 225 | a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True) 226 | a2 = action_all.query("stage == 1")[["A1", "A2", "val"]].reset_index(drop=True) 227 | assert_frame_equal(action_1, a1) 228 | assert_frame_equal(action_2, a2) 229 | # fit bootstrap model 230 | dtr_model = IqLearnReg( 231 | n_stages=2, 232 | model_info=model_info, 233 | n_bs=2 234 | ) 235 | dtr_model.fit(df) 236 | assert len(dtr_model.models[0]) == 2 237 | 238 | 239 | def test_iqlearn_rf_ordinalencoder(): 240 | # setup params 241 | n = 30 242 | thres = int(n / 2) 243 | # rf models 244 | model1 = RandomForestRegressor() 245 | model2 = RandomForestRegressor() 246 | # sample dataframe 247 | df = pd.DataFrame() 248 | df["L1"] = np.arange(n) 249 | df["A1"] = [0, 1, 2] * int(n / 3) 250 | df["A2"] = [0] * int(n / 3) + [1] * int(n / 3) + [3] * int(n / 3) 251 | df["Y1"] = np.zeros(n) 252 | df["Y2"] = np.zeros(n) 253 | # set model info 254 | model_info = [ 255 | { 256 | "model": model1, 257 | "action_dict": {"A1": [0, 1, 2]}, 258 | "feature": ["L1", "A1"], 259 | "outcome": "Y1" 260 | }, 261 | { 262 | "model": model2, 263 | "action_dict": {"A2": [0, 1, 3]}, 264 | "feature": ["L1", "A1", "Y1", "A2"], 265 | "outcome": "Y2" 266 | } 267 | ] 268 | # fit model 269 | dtr_model = IqLearnReg( 270 | n_stages=2, 271 | model_info=model_info 272 | ) 273 | dtr_model.fit(df) 274 | # predict optimal atcions 275 | action_1 = dtr_model.predict(df, 0) 276 | action_2 = dtr_model.predict(df, 1) 277 | action_all = dtr_model.predict_all_stages(df) 278 | # stage 1 test 279 | assert action_1.shape[0] == df.shape[0] 280 | # stage 2 test 281 | assert action_2.shape[0] == df.shape[0] 282 | # all stage test 283 | assert action_all.shape[0] == action_1.shape[0] * 2 284 | a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True) 285 | a2 = action_all.query("stage == 1")[["A2", "val"]].reset_index(drop=True) 286 | assert_frame_equal(action_1, a1) 287 | assert_frame_equal(action_2, a2) 288 | # fit bootstrap model 289 | dtr_model = IqLearnReg( 290 | n_stages=2, 291 | model_info=model_info, 292 | n_bs=2 293 | ) 294 | dtr_model.fit(df) 295 | assert len(dtr_model.models[0]) == 2 296 | 297 | 298 | def test_iqlearn_pipeline_category_encoder(): 299 | # setup params 300 | n = 30 301 | thres = int(n / 2) 302 | # statsmodels 303 | model1 = Pipeline( 304 | [ 305 | ("ce0", ce.OneHotEncoder(cols=["A1"])), 306 | ("scale", StandardScaler()), 307 | ("model", Ridge()) 308 | ] 309 | ) 310 | model2 = Pipeline( 311 | [ 312 | ("ce0", ce.OneHotEncoder(cols=["A1", "A2"])), 313 | ("scale", StandardScaler()), 314 | ("model", Ridge()) 315 | ] 316 | ) 317 | # sample dataframe 318 | df = pd.DataFrame() 319 | df["L1"] = np.arange(n) 320 | df["A1"] = ["A", "B", "C"] * int(n / 3) 321 | df["A2"] = ["A"] * int(n / 3) + ["C"] * int(n / 3) + ["D"] * int(n / 3) 322 | df["Y1"] = np.zeros(n) 323 | df["Y2"] = np.zeros(n) 324 | # set model info 325 | model_info = [ 326 | { 327 | "model": model1, 328 | "action_dict": {"A1": ["A", "B", "C"]}, 329 | "feature": ["L1", "A1"], 330 | "outcome": "Y1" 331 | }, 332 | { 333 | "model": model2, 334 | "action_dict": {"A2": ["A", "C", "D"]}, 335 | "feature": ["L1", "A1", "Y1", "A2"], 336 | "outcome": "Y2" 337 | } 338 | ] 339 | # fit model 340 | dtr_model = IqLearnReg( 341 | n_stages=2, 342 | model_info=model_info 343 | ) 344 | dtr_model.fit(df) 345 | # predict optimal atcions 346 | action_1 = dtr_model.predict(df, 0) 347 | action_2 = dtr_model.predict(df, 1) 348 | action_all = dtr_model.predict_all_stages(df) 349 | # stage 1 test 350 | assert action_1.shape[0] == df.shape[0] 351 | # stage 2 test 352 | assert action_2.shape[0] == df.shape[0] 353 | # all stage test 354 | assert action_all.shape[0] == action_1.shape[0] * 2 355 | a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True) 356 | a2 = action_all.query("stage == 1")[["A2", "val"]].reset_index(drop=True) 357 | assert_frame_equal(action_1, a1) 358 | assert_frame_equal(action_2, a2) 359 | -------------------------------------------------------------------------------- /tests/test_iqlearn_sm_predict.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from sklearn.base import BaseEstimator, RegressorMixin 4 | from pandas._testing import assert_frame_equal 5 | 6 | from pydtr.iqlearn.regression import IqLearnReg 7 | 8 | 9 | def test_iqlearn_sm(): 10 | # setup params 11 | n = 10 12 | thres = int(n / 2) 13 | # statsmodels 14 | model1 = "p_outcome ~ L1 * A1" 15 | model2 = "p_outcome ~ L1 + A1 + Y1 * A2" 16 | # sample dataframe 17 | df = pd.DataFrame() 18 | df["L1"] = np.arange(n) 19 | df["A1"] = [0, 1] * int(n / 2) 20 | df["A2"] = [0] * int(n / 2) + [1] * int(n / 2) 21 | df["Y1"] = np.zeros(n) 22 | df["Y2"] = np.zeros(n) 23 | # set model info 24 | model_info = [ 25 | { 26 | "model": model1, 27 | "action_dict": {"A1": [0, 1]}, 28 | "feature": ["L1", "A1"], 29 | "outcome": "Y1" 30 | }, 31 | { 32 | "model": model2, 33 | "action_dict": {"A2": [0, 1]}, 34 | "feature": ["L1", "A1", "Y1", "A2"], 35 | "outcome": "Y2" 36 | } 37 | ] 38 | # fit model 39 | dtr_model = IqLearnReg( 40 | n_stages=2, 41 | model_info=model_info 42 | ) 43 | dtr_model.fit(df) 44 | # predict optimal atcions 45 | action_1 = dtr_model.predict(df, 0) 46 | action_2 = dtr_model.predict(df, 1) 47 | action_all = dtr_model.predict_all_stages(df) 48 | # stage 1 test 49 | assert action_1.shape[0] == df.shape[0] 50 | # stage 2 test 51 | assert action_2.shape[0] == df.shape[0] 52 | # all stage test 53 | assert action_all.shape[0] == action_1.shape[0] * 2 54 | a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True) 55 | a2 = action_all.query("stage == 1")[["A2", "val"]].reset_index(drop=True) 56 | assert_frame_equal(action_1, a1) 57 | assert_frame_equal(action_2, a2) 58 | # fit bootstrap model 59 | dtr_model = IqLearnReg( 60 | n_stages=2, 61 | model_info=model_info, 62 | n_bs=2 63 | ) 64 | dtr_model.fit(df) 65 | assert len(dtr_model.models[0]) == 2 66 | 67 | 68 | def test_iqlearn_sm_multiple_actions(): 69 | # setup params 70 | n = 10 71 | thres = int(n / 2) 72 | # statsmodels 73 | model1 = "p_outcome ~ L1 * A1" 74 | model2 = "p_outcome ~ L1 + A1 + Y1 * A2" 75 | # sample dataframe 76 | df = pd.DataFrame() 77 | df["L1"] = np.arange(n) 78 | df["A1"] = [0, 1] * int(n / 2) 79 | df["A2"] = [0] * int(n / 2) + [1] * int(n / 2) 80 | df["Y1"] = np.zeros(n) 81 | df["Y2"] = np.zeros(n) 82 | # set model info 83 | model_info = [ 84 | { 85 | "model": model1, 86 | "action_dict": {"A1": [0, 1]}, 87 | "feature": ["L1", "A1"], 88 | "outcome": "Y1" 89 | }, 90 | { 91 | "model": model2, 92 | "action_dict": {"A1": [0, 1], "A2": [0, 1]}, 93 | "feature": ["L1", "A1", "Y1", "A2"], 94 | "outcome": "Y2" 95 | } 96 | ] 97 | # fit model 98 | dtr_model = IqLearnReg( 99 | n_stages=2, 100 | model_info=model_info 101 | ) 102 | dtr_model.fit(df) 103 | # predict optimal atcions 104 | action_1 = dtr_model.predict(df, 0) 105 | action_2 = dtr_model.predict(df, 1) 106 | action_all = dtr_model.predict_all_stages(df) 107 | # stage 1 test 108 | assert action_1.shape[0] == df.shape[0] 109 | # stage 2 test 110 | assert action_2.shape[0] == df.shape[0] 111 | # all stage test 112 | assert action_all.shape[0] == action_1.shape[0] * 2 113 | a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True) 114 | a2 = action_all.query("stage == 1")[["A1", "A2", "val"]].reset_index(drop=True) 115 | assert_frame_equal(action_1, a1) 116 | assert_frame_equal(action_2, a2) 117 | # fit bootstrap model 118 | dtr_model = IqLearnReg( 119 | n_stages=2, 120 | model_info=model_info, 121 | n_bs=2 122 | ) 123 | dtr_model.fit(df) 124 | assert len(dtr_model.models[0]) == 2 125 | 126 | 127 | def test_iqlearn_sm_multinomial_action(): 128 | # setup params 129 | n = 30 130 | thres = int(n / 2) 131 | # statsmodels 132 | model1 = "p_outcome ~ L1 * C(A1)" 133 | model2 = "p_outcome ~ L1 + A1 + Y1 * C(A2)" 134 | # sample dataframe 135 | df = pd.DataFrame() 136 | df["L1"] = np.arange(n) 137 | df["A1"] = ["A", "B", "C"] * int(n / 3) 138 | df["A2"] = ["A"] * int(n / 3) + ["C"] * int(n / 3) + ["D"] * int(n / 3) 139 | df["Y1"] = np.zeros(n) 140 | df["Y2"] = np.zeros(n) 141 | # set model info 142 | model_info = [ 143 | { 144 | "model": model1, 145 | "action_dict": {"A1": ["A", "B", "C"]}, 146 | "feature": ["L1", "A1"], 147 | "outcome": "Y1" 148 | }, 149 | { 150 | "model": model2, 151 | "action_dict": {"A2": ["A", "C", "D"]}, 152 | "feature": ["L1", "A1", "Y1", "A2"], 153 | "outcome": "Y2" 154 | } 155 | ] 156 | # fit model 157 | dtr_model = IqLearnReg( 158 | n_stages=2, 159 | model_info=model_info 160 | ) 161 | dtr_model.fit(df) 162 | # predict optimal atcions 163 | action_1 = dtr_model.predict(df, 0) 164 | action_2 = dtr_model.predict(df, 1) 165 | action_all = dtr_model.predict_all_stages(df) 166 | # stage 1 test 167 | assert action_1.shape[0] == df.shape[0] 168 | # stage 2 test 169 | assert action_2.shape[0] == df.shape[0] 170 | # all stage test 171 | assert action_all.shape[0] == action_1.shape[0] * 2 172 | a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True) 173 | a2 = action_all.query("stage == 1")[["A2", "val"]].reset_index(drop=True) 174 | assert_frame_equal(action_1, a1) 175 | assert_frame_equal(action_2, a2) 176 | # fit bootstrap model 177 | dtr_model = IqLearnReg( 178 | n_stages=2, 179 | model_info=model_info, 180 | n_bs=2 181 | ) 182 | dtr_model.fit(df) 183 | assert len(dtr_model.models[0]) == 2 184 | 185 | 186 | def test_iqlearn_sm_get_params(): 187 | # setup params 188 | n = 300 189 | thres = int(n / 2) 190 | # statsmodels 191 | model1 = "p_outcome ~ L1 * A1" 192 | model2 = "p_outcome ~ L1 + A1 + Y1 * A2" 193 | # sample dataframe 194 | df = pd.DataFrame() 195 | df["L1"] = np.random.normal(0, size=n) 196 | df["A1"] = [0, 1] * int(n / 2) 197 | df["A2"] = [0] * int(n / 2) + [1] * int(n / 2) 198 | df["Y1"] = df["L1"] * df["A1"] + np.random.normal(0, scale=5, size=n) 199 | df["Y2"] = df["A1"] + df["Y1"] * df["A2"] + np.random.normal(0, scale=5, size=n) 200 | # set model info 201 | model_info = [ 202 | { 203 | "model": model1, 204 | "action_dict": {"A1": [0, 1]}, 205 | "feature": ["L1", "A1"], 206 | "outcome": "Y1" 207 | }, 208 | { 209 | "model": model2, 210 | "action_dict": {"A1": [0, 1], "A2": [0, 1]}, 211 | "feature": ["L1", "A1", "Y1", "A2"], 212 | "outcome": "Y2" 213 | } 214 | ] 215 | # fit bootstrap model 216 | dtr_model = IqLearnReg( 217 | n_stages=2, 218 | model_info=model_info, 219 | n_bs=10 220 | ) 221 | dtr_model.fit(df) 222 | # get params 223 | params = dtr_model.get_params() 224 | l1_unique_shape = params.query("stage == 0 & variable == 'L1'")["value"].unique().shape[0] 225 | a1_unique_shape = params.query("stage == 0 & variable == 'A1'")["value"].unique().shape[0] 226 | a2_unique_shape = params.query("stage == 1 & variable == 'A2'")["value"].unique().shape[0] 227 | assert l1_unique_shape != 1 or a1_unique_shape != 1 or a2_unique_shape != 1 228 | assert len(dtr_model.models[0]) == 10 229 | --------------------------------------------------------------------------------