├── .circleci
└── config.yml
├── .github
└── CODEOWNERS
├── .gitignore
├── LICENSE
├── MANIFEST.IN
├── Makefile
├── README.md
├── examples
└── .gitkeep
├── setup.cfg
├── setup.py
├── src
└── pydtr
│ ├── __init__.py
│ ├── iqlearn
│ ├── __init__.py
│ ├── base.py
│ └── regression.py
│ └── version.py
└── tests
├── test_iqlearn_sklearn_predict.py
└── test_iqlearn_sm_predict.py
/.circleci/config.yml:
--------------------------------------------------------------------------------
1 | version: 2.1
2 | orbs:
3 | codecov: codecov/codecov@1.0.2
4 | jobs:
5 | build-and-test:
6 | docker:
7 | - image: circleci/python:3.6
8 | auth:
9 | username: fullflu
10 | password: $DOCKERHUB_PASSWORD
11 | steps:
12 | - checkout
13 | - restore_cache:
14 | key: dependency-cache-{{ checksum "setup.py" }}-{{ checksum "Makefile" }}
15 | - run:
16 | name: Setup testing environment
17 | command: |
18 | python3 -m venv venv
19 | . venv/bin/activate
20 | make dev
21 | - save_cache:
22 | key: dependency-cache-{{ checksum "setup.py" }}-{{ checksum "Makefile" }}
23 | paths:
24 | - "venv"
25 | - run:
26 | name: Run Tests
27 | command: |
28 | . venv/bin/activate
29 | make test
30 | - codecov/upload:
31 | file: coverage.xml
32 | deploy:
33 | docker:
34 | - image: circleci/python:3.6
35 | auth:
36 | username: fullflu
37 | password: $DOCKERHUB_PASSWORD
38 | steps:
39 | - checkout
40 | - restore_cache:
41 | key: dependency-cache-{{ checksum "setup.py" }}-{{ checksum "Makefile" }}
42 | - run:
43 | name: install python dependencies
44 | command: |
45 | python3 -m venv venv
46 | . venv/bin/activate
47 | make dev
48 | - save_cache:
49 | key: dependency-cache-{{ checksum "setup.py" }}-{{ checksum "Makefile" }}
50 | paths:
51 | - "venv"
52 | - run:
53 | name: verify git tag vs. version
54 | command: |
55 | python3 -m venv venv
56 | . venv/bin/activate
57 | python setup.py verify
58 | - run:
59 | name: init .pypirc
60 | command: |
61 | echo -e "[pypi]" >> ~/.pypirc
62 | echo -e "username = fullflu" >> ~/.pypirc
63 | echo -e "password = $PYPI_PASSWORD" >> ~/.pypirc
64 | - run:
65 | name: create packages
66 | command: |
67 | make package
68 | - run:
69 | name: upload to pypi
70 | command: |
71 | . venv/bin/activate
72 | twine upload --repository pypi dist/*
73 | workflows:
74 | version: 2
75 | workflow:
76 | jobs:
77 | - build-and-test:
78 | context: docker-hub-creds
79 | filters:
80 | tags:
81 | only: /.*/
82 | - deploy:
83 | context:
84 | - docker-hub-creds
85 | - pypi-publish
86 | requires:
87 | - build-and-test
88 | filters:
89 | branches:
90 | ignore: /.*/
91 | tags:
92 | only: /[0-9]+(\.[0-9]+)*/
93 |
--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @fullflu
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # cache
2 | __pycache__
3 | .mypy_cache
4 |
5 | # Jupyter Notebook
6 | .ipynb_checkpoints
7 |
8 | # egg
9 | *.egg-info/
10 | .eggs/
11 |
12 | # python
13 | .python-version
14 |
15 | # test
16 | .coverage
17 | htmlcov/
18 |
19 | # env
20 | .env
21 | .envrc
22 |
23 | # build
24 | build/
25 | dist/
26 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2020, fullflu
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | 3. Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/MANIFEST.IN:
--------------------------------------------------------------------------------
1 | include README.md
2 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | dev:
2 | pip install --upgrade pip
3 | pip install -e ./.
4 | pip install category_encoders
5 | pip install pytest
6 | pip install coverage
7 | pip install twine
8 |
9 | package:
10 | python setup.py sdist
11 | python setup.py bdist_wheel
12 |
13 | test:
14 | coverage run --source=src/pydtr -m pytest
15 | coverage report
16 | coverage xml
17 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pydtr
2 |
3 | [](https://app.circleci.com/pipelines/github/fullflu/pydtr)
4 | [](https://codecov.io/gh/fullflu/pydtr)
5 | [](https://opensource.org/licenses/BSD-3-Clause)
6 |
7 |
8 | ## Description
9 |
10 | This is a python library to conduct a dynamic treatment regime ([DTR](https://en.wikipedia.org/wiki/Dynamic_treatment_regime)), `pydtr`.
11 |
12 | A DTR is a paradigm that attempts to select optimal treatments adaptively for individual patients.
13 |
14 | Pydtr enables you to implement DTR methods easily by using sklearn-based interfaces.
15 |
16 | | Method | Single binary treatment | Multiple treatments | Multinomial treatment | Continuous treatment | Modeling flexibility | Interpretability |
17 | | ---- | ---- | ---- | ---- | ---- | ---- | ---- |
18 | | IqLearnReg
(with sklearn) | :white_check_mark: | :white_check_mark: | :white_check_mark:
(with pipeline) | | :white_check_mark:
(with arbitrary regression models) | |
19 | | IqLearnReg
(with statsmodels) | :white_check_mark: | :white_check_mark: | :white_check_mark: | | limited to OLS | :white_check_mark:
(with confidence intervals) |
20 | | GEstimation | WIP | | WIP | WIP | WIP | WIP |
21 |
22 | `IqLearnReg` means a regression method of iterative q-learning.
23 |
24 | When there are categorical independent variables and you use a sklearn model as a regression function, you need to encode the categorical variables before using the model.
25 |
26 | We recommend to encode categorical variables by `category_encoders` and combine the encoders with the sklearn model by `sklearn.pipeline`.
27 |
28 | G-estimation, a famous method of DTR, is now unavailable.
29 |
30 | ## Requirements
31 |
32 | - python>=3.6
33 | - pandas>=1.1.2
34 | - scikit-learn>=0.23.2
35 | - numpy>=1.19.2
36 | - statsmodels>=0.12.0
37 |
38 | ## Installation
39 |
40 | ### From pypi
41 |
42 | ```
43 | pip install pydtr
44 | ```
45 |
46 | ### From source
47 |
48 | ```
49 | git clone https://github.com/fullflu/pydtr.git
50 | cd pydtr
51 | python setup.py install
52 | ```
53 |
54 | ## Usage
55 |
56 | ### Iterative Q Learning (IqLearnReg)
57 |
58 | You need to import libraries and prepare data.
59 |
60 | ```python
61 | # import
62 | import numpy as np
63 | import pandas as pd
64 | from sklearn.ensemble import RandomForestRegressor
65 |
66 | from pydtr.iqlearn.regression import IqLearnReg
67 |
68 | # create sample dataframe
69 | n = 10
70 | thres = int(n / 2)
71 | df = pd.DataFrame()
72 | df["L1"] = np.arange(n)
73 | df["A1"] = [0, 1] * int(n / 2)
74 | df["A2"] = [0] * int(n / 2) + [1] * int(n / 2)
75 | df["Y1"] = np.zeros(n)
76 | df["Y2"] = np.zeros(n)
77 | ```
78 |
79 | You can use sklearn-based models.
80 |
81 | ```python
82 | # set model info
83 | model_info = [
84 | {
85 | "model": RandomForestRegressor(),
86 | "action_dict": {"A1": [0, 1]},
87 | "feature": ["L1", "A1"],
88 | "outcome": "Y1"
89 | },
90 | {
91 | "model": RandomForestRegressor(),
92 | "action_dict": {"A2": [0, 1]},
93 | "feature": ["L1", "A1", "Y1", "A2"],
94 | "outcome": "Y2"
95 | }
96 | ]
97 | # fit model
98 | dtr_model = IqLearnReg(
99 | n_stages=2,
100 | model_info=model_info
101 | )
102 | dtr_model.fit(df)
103 |
104 | # predict optimal atcions
105 | opt_action_stage_1 = dtr_model.predict(df, 0)
106 | opt_action_stage_2 = dtr_model.predict(df, 1)
107 | opt_action_all_stages = dtr_model.predict_all_stages(df)
108 | ```
109 |
110 | You can also use statsmodels-based models.
111 |
112 | ```python
113 | # set model info
114 | model_info = [
115 | {
116 | "model": "p_outcome ~ L1 * A1",
117 | "action_dict": {"A1": [0, 1]},
118 | "feature": ["L1", "A1"],
119 | "outcome": "Y1"
120 | },
121 | {
122 | "model": "p_outcome ~ L1 + A1 + Y1 * A2",
123 | "action_dict": {"A2": [0, 1]},
124 | "feature": ["L1", "A1", "Y1", "A2"],
125 | "outcome": "Y2"
126 | }
127 | ]
128 | # fit model
129 | dtr_model = IqLearnReg(
130 | n_stages=2,
131 | model_info=model_info
132 | )
133 | dtr_model.fit(df)
134 |
135 | # predict optimal atcions
136 | opt_action_stage_1 = dtr_model.predict(df, 0)
137 | opt_action_stage_2 = dtr_model.predict(df, 1)
138 | opt_action_all_stages = dtr_model.predict_all_stages(df)
139 | ```
140 |
141 | Please see [examples](https://github.com/fullflu/pydtr/blob/master/examples/) to get more information.
142 |
143 | ## Authors
144 |
145 | - [@fullflu](https://github.com/fullflu)
146 |
147 | ## Contributors
148 |
149 | Please feel free to create issues or to send pull-requests!
150 |
151 | If all checkes have passed in pull-requests, I will merge and release them.
152 |
153 | ## License
154 |
155 | [BSD](https://github.com/fullflu/pydtr/blob/master/LICENSE)
156 |
157 |
158 | ## Structure
159 |
160 | ```
161 | ├── .circleci
162 | │ ├── config.yml
163 | ├── .github
164 | │ ├── CODEOWNERS
165 | ├── LICENSE
166 | ├── MANIFEST.IN
167 | ├── Makefile
168 | ├── README.md
169 | ├── examples
170 | │ ├── ...several notebooks...
171 | ├── setup.cfg
172 | ├── setup.py
173 | ├── src
174 | │ ├── pydtr
175 | │ │ ├── __init__.py
176 | │ │ └── iqlearn
177 | │ │ ├── __init__.py
178 | │ │ ├── base.py
179 | │ │ └── regression.py
180 | └── tests
181 | ├── test_iqlearn_sklearn_predict.py
182 | └── test_iqlearn_sm_predict.py
183 | ```
184 |
185 | ## References
186 |
187 | - Chakraborty, B, Moodie, EE. *Statistical Methods for Dynamic Treatment Regimes.* Springer, New York, 2013.
188 |
--------------------------------------------------------------------------------
/examples/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fullflu/pydtr/8fbf836b2871ddfe4298a76c1257bfd06b0953ed/examples/.gitkeep
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [aliases]
2 | test=pytest
3 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | import os
5 | import sys
6 |
7 | from setuptools import setup
8 | from setuptools import find_packages
9 | from setuptools.command.install import install
10 | from codecs import open
11 |
12 | from src.pydtr.version import __version__
13 |
14 | here = os.path.abspath(os.path.dirname(__file__))
15 |
16 | with open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
17 | long_description = f.read()
18 |
19 |
20 | class VerifyVersionCommand(install):
21 | """Custom command to verify that the git tag matches our version"""
22 | description = 'verify that the git tag matches our version'
23 |
24 | def run(self):
25 | tag = os.getenv('CIRCLE_TAG')
26 |
27 | if tag != __version__:
28 | info = "Git tag: {0} does not match the version of this app: {1}".format(
29 | tag, __version__
30 | )
31 | sys.exit(info)
32 |
33 |
34 | setup(
35 | name='pydtr',
36 | version=__version__,
37 | description='Python library of Dynamic Treatment Regimes',
38 | long_description=long_description,
39 | long_description_content_type='text/markdown',
40 | url='https://github.com/fullflu/pydtr',
41 | author='fullflu',
42 | author_email='k.takayama0902@gmail.com',
43 | license='BSD',
44 | install_requires=[
45 | 'pandas>=1.1.2',
46 | 'scikit-learn>=0.23.2',
47 | 'numpy>=1.19.2',
48 | 'statsmodels>=0.12.0'
49 | ],
50 | keywords=['dynamic treatment regimes', 'reinforcement learning', 'dtr'],
51 | include_package_data=True,
52 | package_dir={'': "src"},
53 | packages=find_packages('src'),
54 | classifiers=[
55 | 'Topic :: Scientific/Engineering :: Artificial Intelligence',
56 | 'License :: OSI Approved :: BSD License',
57 | 'Programming Language :: Python :: 3',
58 | 'Programming Language :: Python :: 3.6',
59 | ],
60 | setup_requires=["pytest-runner"],
61 | tests_require=["pytest", "pytest-cov", "coverage", "category_encoders"],
62 | cmdclass={
63 | 'verify': VerifyVersionCommand,
64 | }
65 | )
66 |
--------------------------------------------------------------------------------
/src/pydtr/__init__.py:
--------------------------------------------------------------------------------
1 | from .version import __version__
2 |
--------------------------------------------------------------------------------
/src/pydtr/iqlearn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fullflu/pydtr/8fbf836b2871ddfe4298a76c1257bfd06b0953ed/src/pydtr/iqlearn/__init__.py
--------------------------------------------------------------------------------
/src/pydtr/iqlearn/base.py:
--------------------------------------------------------------------------------
1 | import itertools
2 | from typing import Dict
3 | from abc import ABCMeta, abstractmethod
4 |
5 |
6 | import pandas as pd
7 | import numpy as np
8 | from sklearn.utils import resample
9 |
10 |
11 | class IqLearnBase(object):
12 | """
13 | Base class of iterative q learning
14 |
15 | Attributes
16 | ----------
17 | n_stages : int
18 | the number of total stages
19 | model_info : list of dict
20 | list of model information dictionary of each stage
21 | - model: string or sklearn-based instance
22 | model object
23 | - action_dict: dict
24 | action varible -> domain of the variable
25 | - feature: str
26 | feature variable names
27 | - outcome: str
28 | a outcome variable name
29 | n_bs : int
30 | the number of bootstrap sampling (0 means no bootstrap)
31 | """
32 | def __init__(self, n_stages: int, model_info: list, n_bs: int = 0) -> None:
33 | """
34 | Parameters
35 | ----------
36 | n_stages : int
37 | The number of total stages
38 | model_info : list of dict
39 | List of model information dictionary of each stage
40 | - model: string or sklearn-based instance
41 | Model object
42 | - action_dict: dict
43 | Action varible -> domain of the variable
44 | - feature: str
45 | Feature variable names
46 | - outcome: str
47 | An outcome variable name
48 | n_bs : int
49 | The number of bootstrap sampling (0 means no bootstrap)
50 | """
51 | assert n_stages == len(model_info), "n_stages must be the same as the length of model_info"
52 | self.n_stages = n_stages
53 | self.model_info = model_info
54 | self.n_bs = n_bs
55 | self._init_set()
56 |
57 | def _get_max_val_df(self, model, X: pd.DataFrame, t: int) -> pd.DataFrame:
58 | """
59 | Get maximum value of Q function of stage t (ref: max_{A} Q(X, A))
60 | """
61 | action_dict = self.model_info[t]["action_dict"]
62 | keys = list(action_dict.keys())
63 | # dataframe to store pseudo outcomes
64 | max_val_df = pd.DataFrame(columns=keys)
65 | # initialize maximum value
66 | max_val_df["val"] = [-1 * np.inf] * X.shape[0]
67 | tmp_df = X.reset_index(drop=True).copy()
68 | for tmp_dict in [dict(zip(action_dict, vs)) for vs in itertools.product(*action_dict.values())]:
69 | # consider multiple action variables
70 | tmp_keys = list(tmp_dict.keys())
71 | tmp_values = list(tmp_dict.values())
72 | if len(tmp_values) == 1:
73 | tmp_values = tmp_values[0]
74 | # raise ValueError
75 | # set action values
76 | tmp_df[tmp_keys] = tmp_values
77 | # predict q value for the action values
78 | val_a = model.predict(tmp_df)
79 | # update optimal actions and q values
80 | flag = max_val_df["val"] < val_a
81 | max_val_df.loc[flag.values, tmp_keys] = tmp_values
82 | max_val_df.loc[flag.values, "val"] = val_a[flag.values]
83 | # reset independent variables
84 | tmp_df = X.reset_index(drop=True).copy()
85 | return max_val_df
86 |
87 | def _get_p_outcome(self, model: object, X: pd.DataFrame, y: pd.Series, t: int) -> pd.Series:
88 | """
89 | Get pseudo-outcome of stage t
90 | """
91 | # return pseudo outcome
92 | max_val_df = self._get_max_val_df(model, X, t + 1)
93 | return max_val_df["val"].values + y.values
94 |
95 | @staticmethod
96 | def _sample_bs(df: pd.DataFrame, size_bs: int = -1) -> pd.DataFrame:
97 | """
98 | bootstrap sampling
99 | """
100 | # bootstrap sampling
101 | if size_bs == -1:
102 | size_bs = df.shape[0]
103 | return resample(df, n_samples=size_bs)
104 |
105 | def fit(self, df: pd.DataFrame):
106 | """
107 | Fit dtr models
108 |
109 | Parameters
110 | ----------
111 | df: pandas.dataframe
112 | input data (each row contains all stage information of each individual)
113 |
114 | Returns
115 | -------
116 | self
117 | """
118 | # fit models using all data
119 | for t in reversed(range(self.n_stages)):
120 | print("Stage: {}".format(t))
121 | X = df[self.model_info[t]["feature"]]
122 | y = df[self.model_info[t]["outcome"]]
123 | if t == self.n_stages - 1:
124 | p_outcome = y.values
125 | else:
126 | X2 = df[self.model_info[t + 1]["feature"]]
127 | y2 = df[self.model_info[t + 1]["outcome"]]
128 | p_outcome = self._get_p_outcome(self.model_all[t + 1], X2, y2, t)
129 | self._fit_model_all_data(X, p_outcome, t)
130 |
131 | # fit models using bootstrap
132 | for i in range(self.n_bs):
133 | df_i = self._sample_bs(df)
134 | for t in reversed(range(self.n_stages)):
135 | # extract feature and outcome
136 | X = df_i[self.model_info[t]["feature"]]
137 | y = df_i[self.model_info[t]["outcome"]]
138 | if t == self.n_stages - 1:
139 | p_outcome = y.values
140 | else:
141 | X2 = df_i[self.model_info[t + 1]["feature"]]
142 | y2 = df_i[self.model_info[t + 1]["outcome"]]
143 | p_outcome = self._get_p_outcome(self.model_all[t + 1], X2, y2, t)
144 | # fit model of stage t
145 | self._fit_model(X, p_outcome, t, i)
146 | return self
147 |
148 | @abstractmethod
149 | def _fit_model(self, X: pd.DataFrame, p_outcome: np.array, t: int, i: int) -> None:
150 | pass
151 |
152 | @abstractmethod
153 | def _fit_model_all_data(self, X: pd.DataFrame, p_outcome: np.array, t: int) -> None:
154 | pass
155 |
156 | def _init_set(self) -> None:
157 | self.model_all = {}
158 | self.models = {}
159 | for t in range(self.n_stages):
160 | # TODO: check model type
161 | # self._check_model_type(self.model_info[t]["model"])
162 | self.model_all[t] = None
163 | if self.n_bs:
164 | self.models[t] = np.array([None] * (self.n_bs))
165 |
166 | def get_params(self) -> pd.DataFrame:
167 | # get estimated parameters
168 | params = pd.DataFrame()
169 | for t in reversed(range(self.n_stages)):
170 | if type(self.model_info[t]["model"]) == str:
171 | tmp_df = pd.melt(pd.DataFrame([i.params for i in self.models[t]]))
172 | tmp_df["stage"] = t
173 | params = pd.concat([params, tmp_df])
174 | return params
175 |
176 | def predict(self, df: pd.DataFrame, t: int) -> pd.DataFrame:
177 | """
178 | Predict optimal treatment of stage t for each row
179 |
180 | Parameters
181 | ----------
182 | df: pandas.dataframe
183 | input data (each row contains all stage information of each individual)
184 | t: int
185 | stage
186 |
187 | Returns
188 | -------
189 | max_val_df: pandas.dataframe
190 | optimal treatments and their q-values
191 | """
192 | # return optimal actions for the specified stage
193 | X = df[self.model_info[t]["feature"]].copy()
194 | max_val_df = self._get_max_val_df(self.model_all[t], X, t)
195 | return max_val_df
196 |
197 | def predict_all_stages(self, df: pd.DataFrame) -> pd.DataFrame:
198 | """
199 | Predict optimal treatment of all stages for each row
200 |
201 | Parameters
202 | ----------
203 | df: pandas.dataframe
204 | input data (each row contains all stage information of each individual)
205 | t: int
206 | stage
207 |
208 | Returns
209 | -------
210 | max_df: pandas.dataframe
211 | optimal treatments, their q-values, and stage information
212 | """
213 | # return optimal actions for all stages
214 | max_df = pd.DataFrame()
215 | for t in range(self.n_stages):
216 | X = df[self.model_info[t]["feature"]]
217 | max_val_df = self._get_max_val_df(self.model_all[t], X, t)
218 | max_val_df["stage"] = t
219 | max_val_df["row_id"] = np.arange(X.shape[0])
220 | max_df = pd.concat([max_df, max_val_df], sort=True).reset_index(drop=True)
221 | return max_df
222 |
--------------------------------------------------------------------------------
/src/pydtr/iqlearn/regression.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import statsmodels.formula.api as smf
4 |
5 | from .base import IqLearnBase
6 |
7 |
8 | class IqLearnReg(IqLearnBase):
9 | def _fit_model(self, X: pd.DataFrame, p_outcome: np.array, t: int, i: int) -> None:
10 | """
11 | Fit dtr model of stage t and bootstrap i
12 |
13 | Parameters
14 | ----------
15 | X: pandas.dataframe
16 | input data (each row contains feature of each individual)
17 | p_outcome: np.array
18 | pseudo outcome of stage t
19 | t: int
20 | stage
21 | i: int
22 | bootstrap index
23 |
24 | Returns
25 | -------
26 | None
27 | """
28 | if type(self.model_info[t]["model"]) == str:
29 | df = X.copy()
30 | df["p_outcome"] = p_outcome
31 | self.models[t][i] = smf.ols(formula=self.model_info[t]["model"], data=df).fit()
32 | else:
33 | self.models[t][i] = self.model_info[t]["model"].fit(X, p_outcome)
34 |
35 | def _fit_model_all_data(self, X: pd.DataFrame, p_outcome: np.array, t: int) -> None:
36 | """
37 | Fit dtr model of stage t
38 |
39 | Parameters
40 | ----------
41 | X: pandas.dataframe
42 | input data (each row contains feature of each individual)
43 | p_outcome: np.array
44 | pseudo outcome of stage t
45 | t: int
46 | stage
47 |
48 | Returns
49 | -------
50 | None
51 | """
52 | if type(self.model_info[t]["model"]) == str:
53 | df = X.copy()
54 | df["p_outcome"] = p_outcome
55 | self.model_all[t] = smf.ols(formula=self.model_info[t]["model"], data=df).fit()
56 | else:
57 | self.model_all[t] = self.model_info[t]["model"].fit(X, p_outcome)
58 |
--------------------------------------------------------------------------------
/src/pydtr/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.0.2"
2 |
--------------------------------------------------------------------------------
/tests/test_iqlearn_sklearn_predict.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import category_encoders as ce
4 | from sklearn.base import BaseEstimator, RegressorMixin
5 | from sklearn.ensemble import RandomForestRegressor
6 | from pandas._testing import assert_frame_equal
7 | from sklearn.linear_model import Ridge
8 | from sklearn.preprocessing import StandardScaler
9 | from sklearn.pipeline import Pipeline
10 |
11 | from pydtr.iqlearn.regression import IqLearnReg
12 |
13 |
14 | class RegWrapperRule1(BaseEstimator, RegressorMixin):
15 | def __init__(self, thres: int, fit_value: int) -> None:
16 | self.thres = thres
17 | self.fit_value = fit_value
18 |
19 | def fit(self, X: pd.DataFrame, y: np.array):
20 | return self
21 |
22 | def predict(self, X):
23 | flag = X["L1"] < self.thres
24 | q_val = pd.Series(np.zeros(X.shape[0]))
25 | # if L1 >= thres; then q(L1, 1) > q(L1, 0); else q(L1, 1) < q(L1, 0)
26 | q_val[flag] = X.loc[flag, "A1"] * 2 + (1 - X.loc[flag, "A1"]) * 1
27 | q_val[~flag] = X.loc[~flag, "A1"] * 1 + (1 - X.loc[~flag, "A1"]) * 2
28 | return q_val
29 |
30 |
31 | class RegWrapperRule2(BaseEstimator, RegressorMixin):
32 | def __init__(self, thres: int, fit_value: int) -> None:
33 | self.thres = thres
34 | self.fit_value = fit_value
35 |
36 | def fit(self, X: pd.DataFrame, y: np.array):
37 | return self
38 |
39 | def predict(self, X: pd.DataFrame) -> pd.Series:
40 | flag = X["L1"] < self.thres
41 | q_val = pd.Series(np.zeros(X.shape[0]))
42 | # if L1 >= thres; then q(L1, 1) < q(L1, 0); else q(L1, 1) > q(L1, 0)
43 | q_val[flag] = X.loc[flag, "A2"] * 1 + (1 - X.loc[flag, "A2"]) * 2
44 | q_val[~flag] = X.loc[~flag, "A2"] * 2 + (1 - X.loc[~flag, "A2"]) * 1
45 | return q_val
46 |
47 |
48 | def test_iqlearn_regwrapper_rule():
49 | # setup params
50 | n = 10
51 | thres = int(n / 2)
52 | # sample rule base models
53 | model1 = RegWrapperRule1(thres, 1)
54 | model2 = RegWrapperRule2(thres, 2)
55 | # sample dataframe
56 | df = pd.DataFrame()
57 | df["L1"] = np.arange(n)
58 | df["A1"] = [0, 1] * int(n / 2)
59 | df["A2"] = [0] * int(n / 2) + [1] * int(n / 2)
60 | df["Y1"] = np.zeros(n)
61 | df["Y2"] = np.zeros(n)
62 | # set model info
63 | model_info = [
64 | {
65 | "model": model1,
66 | "action_dict": {"A1": [0, 1]},
67 | "feature": ["L1", "A1"],
68 | "outcome": "Y1"
69 | },
70 | {
71 | "model": model2,
72 | "action_dict": {"A2": [0, 1]},
73 | "feature": ["L1", "A1", "Y1", "A2"],
74 | "outcome": "Y2"
75 | }
76 | ]
77 | # fit model (dummy)
78 | dtr_model = IqLearnReg(
79 | n_stages=2,
80 | model_info=model_info
81 | )
82 | dtr_model.fit(df)
83 | assert dtr_model.model_all[0].fit_value == 1
84 | assert dtr_model.model_all[1].fit_value == 2
85 | # predict optimal atcions
86 | action_1 = dtr_model.predict(df, 0)
87 | action_2 = dtr_model.predict(df, 1)
88 | action_all = dtr_model.predict_all_stages(df)
89 | # stage 1 test
90 | true_action_1 = [1] * int(n / 2) + [0] * int(n / 2)
91 | assert all([a == b for a, b in zip(action_1["A1"].tolist(), true_action_1)])
92 | # stage 2 test
93 | true_action_2 = [0] * int(n / 2) + [1] * int(n / 2)
94 | assert all([a == b for a, b in zip(action_2["A2"].tolist(), true_action_2)])
95 | # all stage test
96 | assert action_all.shape[0] == action_1.shape[0] * 2
97 | a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True)
98 | a2 = action_all.query("stage == 1")[["A2", "val"]].reset_index(drop=True)
99 | assert_frame_equal(action_1, a1)
100 | assert_frame_equal(action_2, a2)
101 | # function test
102 | q_val_1 = dtr_model._get_max_val_df(dtr_model.model_all[0], df, 0)
103 | assert all([a == b for a, b in zip(q_val_1["A1"].tolist(), true_action_1)])
104 | q_val_2 = dtr_model._get_max_val_df(dtr_model.model_all[1], df, 1)
105 | assert all([a == b for a, b in zip(q_val_2["A2"].tolist(), true_action_2)])
106 | # fit bootstrap model (dummy)
107 | dtr_model = IqLearnReg(
108 | n_stages=2,
109 | model_info=model_info,
110 | n_bs=2
111 | )
112 | dtr_model.fit(df)
113 | assert dtr_model.model_all[0].fit_value == 1
114 | assert dtr_model.model_all[1].fit_value == 2
115 | # check bootstrap model
116 | assert dtr_model.models[0][0].fit_value == 1
117 | assert dtr_model.models[1][0].fit_value == 2
118 | assert len(dtr_model.models[0]) == 2
119 |
120 |
121 | def test_iqlearn_rf():
122 | # setup params
123 | n = 10
124 | thres = int(n / 2)
125 | # rf models
126 | model1 = RandomForestRegressor()
127 | model2 = RandomForestRegressor()
128 | # sample dataframe
129 | df = pd.DataFrame()
130 | df["L1"] = np.arange(n)
131 | df["A1"] = [0, 1] * int(n / 2)
132 | df["A2"] = [0] * int(n / 2) + [1] * int(n / 2)
133 | df["Y1"] = np.zeros(n)
134 | df["Y2"] = np.zeros(n)
135 | # set model info
136 | model_info = [
137 | {
138 | "model": model1,
139 | "action_dict": {"A1": [0, 1]},
140 | "feature": ["L1", "A1"],
141 | "outcome": "Y1"
142 | },
143 | {
144 | "model": model2,
145 | "action_dict": {"A2": [0, 1]},
146 | "feature": ["L1", "A1", "Y1", "A2"],
147 | "outcome": "Y2"
148 | }
149 | ]
150 | # fit model
151 | dtr_model = IqLearnReg(
152 | n_stages=2,
153 | model_info=model_info
154 | )
155 | dtr_model.fit(df)
156 | # predict optimal atcions
157 | action_1 = dtr_model.predict(df, 0)
158 | action_2 = dtr_model.predict(df, 1)
159 | action_all = dtr_model.predict_all_stages(df)
160 | # stage 1 test
161 | assert action_1.shape[0] == df.shape[0]
162 | # stage 2 test
163 | assert action_2.shape[0] == df.shape[0]
164 | # all stage test
165 | assert action_all.shape[0] == action_1.shape[0] * 2
166 | a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True)
167 | a2 = action_all.query("stage == 1")[["A2", "val"]].reset_index(drop=True)
168 | assert_frame_equal(action_1, a1)
169 | assert_frame_equal(action_2, a2)
170 | # fit bootstrap model
171 | dtr_model = IqLearnReg(
172 | n_stages=2,
173 | model_info=model_info,
174 | n_bs=2
175 | )
176 | dtr_model.fit(df)
177 | assert len(dtr_model.models[0]) == 2
178 |
179 |
180 | def test_iqlearn_rf_multiple_actions():
181 | # setup params
182 | n = 10
183 | thres = int(n / 2)
184 | # rf models
185 | model1 = RandomForestRegressor()
186 | model2 = RandomForestRegressor()
187 | # sample dataframe
188 | df = pd.DataFrame()
189 | df["L1"] = np.arange(n)
190 | df["A1"] = [0, 1] * int(n / 2)
191 | df["A2"] = [0] * int(n / 2) + [1] * int(n / 2)
192 | df["Y1"] = np.zeros(n)
193 | df["Y2"] = np.zeros(n)
194 | # set model info
195 | model_info = [
196 | {
197 | "model": model1,
198 | "action_dict": {"A1": [0, 1]},
199 | "feature": ["L1", "A1"],
200 | "outcome": "Y1"
201 | },
202 | {
203 | "model": model2,
204 | "action_dict": {"A1": [0, 1], "A2": [0, 1]},
205 | "feature": ["L1", "A1", "Y1", "A2"],
206 | "outcome": "Y2"
207 | }
208 | ]
209 | # fit model
210 | dtr_model = IqLearnReg(
211 | n_stages=2,
212 | model_info=model_info
213 | )
214 | dtr_model.fit(df)
215 | # predict optimal atcions
216 | action_1 = dtr_model.predict(df, 0)
217 | action_2 = dtr_model.predict(df, 1)
218 | action_all = dtr_model.predict_all_stages(df)
219 | # stage 1 test
220 | assert action_1.shape[0] == df.shape[0]
221 | # stage 2 test
222 | assert action_2.shape[0] == df.shape[0]
223 | # all stage test
224 | assert action_all.shape[0] == action_1.shape[0] * 2
225 | a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True)
226 | a2 = action_all.query("stage == 1")[["A1", "A2", "val"]].reset_index(drop=True)
227 | assert_frame_equal(action_1, a1)
228 | assert_frame_equal(action_2, a2)
229 | # fit bootstrap model
230 | dtr_model = IqLearnReg(
231 | n_stages=2,
232 | model_info=model_info,
233 | n_bs=2
234 | )
235 | dtr_model.fit(df)
236 | assert len(dtr_model.models[0]) == 2
237 |
238 |
239 | def test_iqlearn_rf_ordinalencoder():
240 | # setup params
241 | n = 30
242 | thres = int(n / 2)
243 | # rf models
244 | model1 = RandomForestRegressor()
245 | model2 = RandomForestRegressor()
246 | # sample dataframe
247 | df = pd.DataFrame()
248 | df["L1"] = np.arange(n)
249 | df["A1"] = [0, 1, 2] * int(n / 3)
250 | df["A2"] = [0] * int(n / 3) + [1] * int(n / 3) + [3] * int(n / 3)
251 | df["Y1"] = np.zeros(n)
252 | df["Y2"] = np.zeros(n)
253 | # set model info
254 | model_info = [
255 | {
256 | "model": model1,
257 | "action_dict": {"A1": [0, 1, 2]},
258 | "feature": ["L1", "A1"],
259 | "outcome": "Y1"
260 | },
261 | {
262 | "model": model2,
263 | "action_dict": {"A2": [0, 1, 3]},
264 | "feature": ["L1", "A1", "Y1", "A2"],
265 | "outcome": "Y2"
266 | }
267 | ]
268 | # fit model
269 | dtr_model = IqLearnReg(
270 | n_stages=2,
271 | model_info=model_info
272 | )
273 | dtr_model.fit(df)
274 | # predict optimal atcions
275 | action_1 = dtr_model.predict(df, 0)
276 | action_2 = dtr_model.predict(df, 1)
277 | action_all = dtr_model.predict_all_stages(df)
278 | # stage 1 test
279 | assert action_1.shape[0] == df.shape[0]
280 | # stage 2 test
281 | assert action_2.shape[0] == df.shape[0]
282 | # all stage test
283 | assert action_all.shape[0] == action_1.shape[0] * 2
284 | a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True)
285 | a2 = action_all.query("stage == 1")[["A2", "val"]].reset_index(drop=True)
286 | assert_frame_equal(action_1, a1)
287 | assert_frame_equal(action_2, a2)
288 | # fit bootstrap model
289 | dtr_model = IqLearnReg(
290 | n_stages=2,
291 | model_info=model_info,
292 | n_bs=2
293 | )
294 | dtr_model.fit(df)
295 | assert len(dtr_model.models[0]) == 2
296 |
297 |
298 | def test_iqlearn_pipeline_category_encoder():
299 | # setup params
300 | n = 30
301 | thres = int(n / 2)
302 | # statsmodels
303 | model1 = Pipeline(
304 | [
305 | ("ce0", ce.OneHotEncoder(cols=["A1"])),
306 | ("scale", StandardScaler()),
307 | ("model", Ridge())
308 | ]
309 | )
310 | model2 = Pipeline(
311 | [
312 | ("ce0", ce.OneHotEncoder(cols=["A1", "A2"])),
313 | ("scale", StandardScaler()),
314 | ("model", Ridge())
315 | ]
316 | )
317 | # sample dataframe
318 | df = pd.DataFrame()
319 | df["L1"] = np.arange(n)
320 | df["A1"] = ["A", "B", "C"] * int(n / 3)
321 | df["A2"] = ["A"] * int(n / 3) + ["C"] * int(n / 3) + ["D"] * int(n / 3)
322 | df["Y1"] = np.zeros(n)
323 | df["Y2"] = np.zeros(n)
324 | # set model info
325 | model_info = [
326 | {
327 | "model": model1,
328 | "action_dict": {"A1": ["A", "B", "C"]},
329 | "feature": ["L1", "A1"],
330 | "outcome": "Y1"
331 | },
332 | {
333 | "model": model2,
334 | "action_dict": {"A2": ["A", "C", "D"]},
335 | "feature": ["L1", "A1", "Y1", "A2"],
336 | "outcome": "Y2"
337 | }
338 | ]
339 | # fit model
340 | dtr_model = IqLearnReg(
341 | n_stages=2,
342 | model_info=model_info
343 | )
344 | dtr_model.fit(df)
345 | # predict optimal atcions
346 | action_1 = dtr_model.predict(df, 0)
347 | action_2 = dtr_model.predict(df, 1)
348 | action_all = dtr_model.predict_all_stages(df)
349 | # stage 1 test
350 | assert action_1.shape[0] == df.shape[0]
351 | # stage 2 test
352 | assert action_2.shape[0] == df.shape[0]
353 | # all stage test
354 | assert action_all.shape[0] == action_1.shape[0] * 2
355 | a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True)
356 | a2 = action_all.query("stage == 1")[["A2", "val"]].reset_index(drop=True)
357 | assert_frame_equal(action_1, a1)
358 | assert_frame_equal(action_2, a2)
359 |
--------------------------------------------------------------------------------
/tests/test_iqlearn_sm_predict.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | from sklearn.base import BaseEstimator, RegressorMixin
4 | from pandas._testing import assert_frame_equal
5 |
6 | from pydtr.iqlearn.regression import IqLearnReg
7 |
8 |
9 | def test_iqlearn_sm():
10 | # setup params
11 | n = 10
12 | thres = int(n / 2)
13 | # statsmodels
14 | model1 = "p_outcome ~ L1 * A1"
15 | model2 = "p_outcome ~ L1 + A1 + Y1 * A2"
16 | # sample dataframe
17 | df = pd.DataFrame()
18 | df["L1"] = np.arange(n)
19 | df["A1"] = [0, 1] * int(n / 2)
20 | df["A2"] = [0] * int(n / 2) + [1] * int(n / 2)
21 | df["Y1"] = np.zeros(n)
22 | df["Y2"] = np.zeros(n)
23 | # set model info
24 | model_info = [
25 | {
26 | "model": model1,
27 | "action_dict": {"A1": [0, 1]},
28 | "feature": ["L1", "A1"],
29 | "outcome": "Y1"
30 | },
31 | {
32 | "model": model2,
33 | "action_dict": {"A2": [0, 1]},
34 | "feature": ["L1", "A1", "Y1", "A2"],
35 | "outcome": "Y2"
36 | }
37 | ]
38 | # fit model
39 | dtr_model = IqLearnReg(
40 | n_stages=2,
41 | model_info=model_info
42 | )
43 | dtr_model.fit(df)
44 | # predict optimal atcions
45 | action_1 = dtr_model.predict(df, 0)
46 | action_2 = dtr_model.predict(df, 1)
47 | action_all = dtr_model.predict_all_stages(df)
48 | # stage 1 test
49 | assert action_1.shape[0] == df.shape[0]
50 | # stage 2 test
51 | assert action_2.shape[0] == df.shape[0]
52 | # all stage test
53 | assert action_all.shape[0] == action_1.shape[0] * 2
54 | a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True)
55 | a2 = action_all.query("stage == 1")[["A2", "val"]].reset_index(drop=True)
56 | assert_frame_equal(action_1, a1)
57 | assert_frame_equal(action_2, a2)
58 | # fit bootstrap model
59 | dtr_model = IqLearnReg(
60 | n_stages=2,
61 | model_info=model_info,
62 | n_bs=2
63 | )
64 | dtr_model.fit(df)
65 | assert len(dtr_model.models[0]) == 2
66 |
67 |
68 | def test_iqlearn_sm_multiple_actions():
69 | # setup params
70 | n = 10
71 | thres = int(n / 2)
72 | # statsmodels
73 | model1 = "p_outcome ~ L1 * A1"
74 | model2 = "p_outcome ~ L1 + A1 + Y1 * A2"
75 | # sample dataframe
76 | df = pd.DataFrame()
77 | df["L1"] = np.arange(n)
78 | df["A1"] = [0, 1] * int(n / 2)
79 | df["A2"] = [0] * int(n / 2) + [1] * int(n / 2)
80 | df["Y1"] = np.zeros(n)
81 | df["Y2"] = np.zeros(n)
82 | # set model info
83 | model_info = [
84 | {
85 | "model": model1,
86 | "action_dict": {"A1": [0, 1]},
87 | "feature": ["L1", "A1"],
88 | "outcome": "Y1"
89 | },
90 | {
91 | "model": model2,
92 | "action_dict": {"A1": [0, 1], "A2": [0, 1]},
93 | "feature": ["L1", "A1", "Y1", "A2"],
94 | "outcome": "Y2"
95 | }
96 | ]
97 | # fit model
98 | dtr_model = IqLearnReg(
99 | n_stages=2,
100 | model_info=model_info
101 | )
102 | dtr_model.fit(df)
103 | # predict optimal atcions
104 | action_1 = dtr_model.predict(df, 0)
105 | action_2 = dtr_model.predict(df, 1)
106 | action_all = dtr_model.predict_all_stages(df)
107 | # stage 1 test
108 | assert action_1.shape[0] == df.shape[0]
109 | # stage 2 test
110 | assert action_2.shape[0] == df.shape[0]
111 | # all stage test
112 | assert action_all.shape[0] == action_1.shape[0] * 2
113 | a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True)
114 | a2 = action_all.query("stage == 1")[["A1", "A2", "val"]].reset_index(drop=True)
115 | assert_frame_equal(action_1, a1)
116 | assert_frame_equal(action_2, a2)
117 | # fit bootstrap model
118 | dtr_model = IqLearnReg(
119 | n_stages=2,
120 | model_info=model_info,
121 | n_bs=2
122 | )
123 | dtr_model.fit(df)
124 | assert len(dtr_model.models[0]) == 2
125 |
126 |
127 | def test_iqlearn_sm_multinomial_action():
128 | # setup params
129 | n = 30
130 | thres = int(n / 2)
131 | # statsmodels
132 | model1 = "p_outcome ~ L1 * C(A1)"
133 | model2 = "p_outcome ~ L1 + A1 + Y1 * C(A2)"
134 | # sample dataframe
135 | df = pd.DataFrame()
136 | df["L1"] = np.arange(n)
137 | df["A1"] = ["A", "B", "C"] * int(n / 3)
138 | df["A2"] = ["A"] * int(n / 3) + ["C"] * int(n / 3) + ["D"] * int(n / 3)
139 | df["Y1"] = np.zeros(n)
140 | df["Y2"] = np.zeros(n)
141 | # set model info
142 | model_info = [
143 | {
144 | "model": model1,
145 | "action_dict": {"A1": ["A", "B", "C"]},
146 | "feature": ["L1", "A1"],
147 | "outcome": "Y1"
148 | },
149 | {
150 | "model": model2,
151 | "action_dict": {"A2": ["A", "C", "D"]},
152 | "feature": ["L1", "A1", "Y1", "A2"],
153 | "outcome": "Y2"
154 | }
155 | ]
156 | # fit model
157 | dtr_model = IqLearnReg(
158 | n_stages=2,
159 | model_info=model_info
160 | )
161 | dtr_model.fit(df)
162 | # predict optimal atcions
163 | action_1 = dtr_model.predict(df, 0)
164 | action_2 = dtr_model.predict(df, 1)
165 | action_all = dtr_model.predict_all_stages(df)
166 | # stage 1 test
167 | assert action_1.shape[0] == df.shape[0]
168 | # stage 2 test
169 | assert action_2.shape[0] == df.shape[0]
170 | # all stage test
171 | assert action_all.shape[0] == action_1.shape[0] * 2
172 | a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True)
173 | a2 = action_all.query("stage == 1")[["A2", "val"]].reset_index(drop=True)
174 | assert_frame_equal(action_1, a1)
175 | assert_frame_equal(action_2, a2)
176 | # fit bootstrap model
177 | dtr_model = IqLearnReg(
178 | n_stages=2,
179 | model_info=model_info,
180 | n_bs=2
181 | )
182 | dtr_model.fit(df)
183 | assert len(dtr_model.models[0]) == 2
184 |
185 |
186 | def test_iqlearn_sm_get_params():
187 | # setup params
188 | n = 300
189 | thres = int(n / 2)
190 | # statsmodels
191 | model1 = "p_outcome ~ L1 * A1"
192 | model2 = "p_outcome ~ L1 + A1 + Y1 * A2"
193 | # sample dataframe
194 | df = pd.DataFrame()
195 | df["L1"] = np.random.normal(0, size=n)
196 | df["A1"] = [0, 1] * int(n / 2)
197 | df["A2"] = [0] * int(n / 2) + [1] * int(n / 2)
198 | df["Y1"] = df["L1"] * df["A1"] + np.random.normal(0, scale=5, size=n)
199 | df["Y2"] = df["A1"] + df["Y1"] * df["A2"] + np.random.normal(0, scale=5, size=n)
200 | # set model info
201 | model_info = [
202 | {
203 | "model": model1,
204 | "action_dict": {"A1": [0, 1]},
205 | "feature": ["L1", "A1"],
206 | "outcome": "Y1"
207 | },
208 | {
209 | "model": model2,
210 | "action_dict": {"A1": [0, 1], "A2": [0, 1]},
211 | "feature": ["L1", "A1", "Y1", "A2"],
212 | "outcome": "Y2"
213 | }
214 | ]
215 | # fit bootstrap model
216 | dtr_model = IqLearnReg(
217 | n_stages=2,
218 | model_info=model_info,
219 | n_bs=10
220 | )
221 | dtr_model.fit(df)
222 | # get params
223 | params = dtr_model.get_params()
224 | l1_unique_shape = params.query("stage == 0 & variable == 'L1'")["value"].unique().shape[0]
225 | a1_unique_shape = params.query("stage == 0 & variable == 'A1'")["value"].unique().shape[0]
226 | a2_unique_shape = params.query("stage == 1 & variable == 'A2'")["value"].unique().shape[0]
227 | assert l1_unique_shape != 1 or a1_unique_shape != 1 or a2_unique_shape != 1
228 | assert len(dtr_model.models[0]) == 10
229 |
--------------------------------------------------------------------------------