├── .circleci
    └── config.yml
├── .github
    └── CODEOWNERS
├── .gitignore
├── LICENSE
├── MANIFEST.IN
├── Makefile
├── README.md
├── examples
    └── .gitkeep
├── setup.cfg
├── setup.py
├── src
    └── pydtr
    │   ├── __init__.py
    │   ├── iqlearn
    │       ├── __init__.py
    │       ├── base.py
    │       └── regression.py
    │   └── version.py
└── tests
    ├── test_iqlearn_sklearn_predict.py
    └── test_iqlearn_sm_predict.py


/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | version: 2.1
 2 | orbs:
 3 |   codecov: codecov/codecov@1.0.2
 4 | jobs:
 5 |   build-and-test:
 6 |     docker:
 7 |       - image: circleci/python:3.6
 8 |         auth:
 9 |           username: fullflu
10 |           password: $DOCKERHUB_PASSWORD 
11 |     steps:
12 |       - checkout
13 |       - restore_cache:
14 |           key: dependency-cache-{{ checksum "setup.py" }}-{{ checksum "Makefile" }}
15 |       - run:
16 |           name: Setup testing environment
17 |           command: |
18 |             python3 -m venv venv
19 |             . venv/bin/activate
20 |             make dev
21 |       - save_cache:
22 |           key: dependency-cache-{{ checksum "setup.py" }}-{{ checksum "Makefile" }}
23 |           paths:
24 |             - "venv"
25 |       - run:
26 |           name: Run Tests
27 |           command: |
28 |             . venv/bin/activate
29 |             make test
30 |       - codecov/upload:
31 |           file: coverage.xml
32 |   deploy:
33 |     docker:
34 |       - image: circleci/python:3.6
35 |         auth:
36 |           username: fullflu
37 |           password: $DOCKERHUB_PASSWORD 
38 |     steps:
39 |       - checkout
40 |       - restore_cache:
41 |           key: dependency-cache-{{ checksum "setup.py" }}-{{ checksum "Makefile" }}
42 |       - run:
43 |           name: install python dependencies
44 |           command: |
45 |             python3 -m venv venv
46 |             . venv/bin/activate
47 |             make dev
48 |       - save_cache:
49 |           key: dependency-cache-{{ checksum "setup.py" }}-{{ checksum "Makefile" }}
50 |           paths:
51 |             - "venv"
52 |       - run:
53 |           name: verify git tag vs. version
54 |           command: |
55 |             python3 -m venv venv
56 |             . venv/bin/activate
57 |             python setup.py verify
58 |       - run:
59 |           name: init .pypirc
60 |           command: |
61 |             echo -e "[pypi]" >> ~/.pypirc
62 |             echo -e "username = fullflu" >> ~/.pypirc
63 |             echo -e "password = $PYPI_PASSWORD" >> ~/.pypirc
64 |       - run:
65 |           name: create packages
66 |           command: |
67 |             make package
68 |       - run:
69 |           name: upload to pypi
70 |           command: |
71 |             . venv/bin/activate
72 |             twine upload --repository pypi dist/*
73 | workflows:
74 |   version: 2
75 |   workflow:
76 |     jobs:
77 |       - build-and-test:
78 |           context: docker-hub-creds
79 |           filters:
80 |             tags:
81 |               only: /.*/
82 |       - deploy:
83 |           context:
84 |             - docker-hub-creds
85 |             - pypi-publish
86 |           requires:
87 |             - build-and-test
88 |           filters:
89 |             branches:
90 |               ignore: /.*/
91 |             tags:
92 |               only: /[0-9]+(\.[0-9]+)*/
93 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @fullflu
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # cache
 2 | __pycache__
 3 | .mypy_cache
 4 | 
 5 | # Jupyter Notebook
 6 | .ipynb_checkpoints
 7 | 
 8 | # egg
 9 | *.egg-info/
10 | .eggs/
11 | 
12 | # python
13 | .python-version
14 | 
15 | # test
16 | .coverage
17 | htmlcov/
18 | 
19 | # env
20 | .env
21 | .envrc
22 | 
23 | # build
24 | build/
25 | dist/
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2020, fullflu
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/MANIFEST.IN:
--------------------------------------------------------------------------------
1 | include README.md
2 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | dev:
 2 | 	pip install --upgrade pip
 3 | 	pip install -e ./.
 4 | 	pip install category_encoders
 5 | 	pip install pytest
 6 | 	pip install coverage
 7 | 	pip install twine
 8 | 
 9 | package:
10 | 	python setup.py sdist
11 | 	python setup.py bdist_wheel
12 | 
13 | test:
14 | 	coverage run --source=src/pydtr -m pytest
15 | 	coverage report
16 | 	coverage xml
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # pydtr
  2 | 
  3 | [![CircleCI](https://circleci.com/gh/fullflu/pydtr.svg?style=shield)](https://app.circleci.com/pipelines/github/fullflu/pydtr)
  4 | [![codecov](https://codecov.io/gh/fullflu/pydtr/branch/master/graph/badge.svg)](https://codecov.io/gh/fullflu/pydtr)
  5 | [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause)
  6 | 
  7 | 
  8 | ## Description
  9 | 
 10 | This is a python library to conduct a dynamic treatment regime ([DTR](https://en.wikipedia.org/wiki/Dynamic_treatment_regime)), `pydtr`.
 11 | 
 12 | A DTR is a paradigm that attempts to select optimal treatments adaptively for individual patients.
 13 | 
 14 | Pydtr enables you to implement DTR methods easily by using sklearn-based interfaces.
 15 | 
 16 | |                Method                 |  Single binary treatment   |  Multiple treatments  |    Multinomial treatment   |  Continuous treatment  |  Modeling flexibility  |  Interpretability  |
 17 | | ---- | ---- | ---- | ---- | ---- | ---- | ---- | 
 18 | |  IqLearnReg <br> (with sklearn)      |  :white_check_mark:   |   :white_check_mark:  |   :white_check_mark: <br>(with pipeline)  |    |   :white_check_mark: <br>(with arbitrary regression models)  |       |
 19 | |  IqLearnReg <br> (with statsmodels)  |  :white_check_mark:   |   :white_check_mark:  |   :white_check_mark:       |    | limited to OLS   |    :white_check_mark: <br>(with confidence intervals)  |
 20 | | GEstimation | WIP | | WIP | WIP | WIP | WIP |
 21 | 
 22 | `IqLearnReg` means a regression method of iterative q-learning.
 23 | 
 24 | When there are categorical independent variables and you use a sklearn model as a regression function, you need to encode the categorical variables before using the model.
 25 | 
 26 | We recommend to encode categorical variables by `category_encoders` and combine the encoders with the sklearn model by `sklearn.pipeline`.
 27 | 
 28 | G-estimation, a famous method of DTR, is now unavailable.
 29 | 
 30 | ## Requirements
 31 | 
 32 | - python>=3.6
 33 | - pandas>=1.1.2
 34 | - scikit-learn>=0.23.2
 35 | - numpy>=1.19.2
 36 | - statsmodels>=0.12.0
 37 | 
 38 | ## Installation
 39 | 
 40 | ### From pypi
 41 | 
 42 | ```
 43 | pip install pydtr
 44 | ```
 45 | 
 46 | ### From source
 47 | 
 48 | ```
 49 | git clone https://github.com/fullflu/pydtr.git
 50 | cd pydtr
 51 | python setup.py install
 52 | ```
 53 | 
 54 | ## Usage
 55 | 
 56 | ### Iterative Q Learning (IqLearnReg)
 57 | 
 58 | You need to import libraries and prepare data.
 59 | 
 60 | ```python
 61 | # import
 62 | import numpy as np
 63 | import pandas as pd
 64 | from sklearn.ensemble import RandomForestRegressor
 65 | 
 66 | from pydtr.iqlearn.regression import IqLearnReg
 67 | 
 68 | # create sample dataframe
 69 | n = 10
 70 | thres = int(n / 2)
 71 | df = pd.DataFrame()
 72 | df["L1"] = np.arange(n)
 73 | df["A1"] = [0, 1] * int(n / 2)
 74 | df["A2"] = [0] * int(n / 2) + [1] * int(n / 2)
 75 | df["Y1"] = np.zeros(n)
 76 | df["Y2"] = np.zeros(n)
 77 | ```
 78 | 
 79 | You can use sklearn-based models.
 80 | 
 81 | ```python
 82 | # set model info
 83 | model_info = [
 84 |     {
 85 |         "model": RandomForestRegressor(),
 86 |         "action_dict": {"A1": [0, 1]},
 87 |         "feature": ["L1", "A1"],
 88 |         "outcome": "Y1"
 89 |     },
 90 |     {
 91 |         "model": RandomForestRegressor(),
 92 |         "action_dict": {"A2": [0, 1]},
 93 |         "feature": ["L1", "A1", "Y1", "A2"],
 94 |         "outcome": "Y2"
 95 |     }
 96 | ]
 97 | # fit model
 98 | dtr_model = IqLearnReg(
 99 |     n_stages=2,
100 |     model_info=model_info
101 | )
102 | dtr_model.fit(df)
103 | 
104 | # predict optimal atcions
105 | opt_action_stage_1 = dtr_model.predict(df, 0)
106 | opt_action_stage_2 = dtr_model.predict(df, 1)
107 | opt_action_all_stages = dtr_model.predict_all_stages(df)
108 | ```
109 | 
110 | You can also use statsmodels-based models.
111 | 
112 | ```python
113 | # set model info
114 | model_info = [
115 |     {
116 |         "model": "p_outcome ~ L1 * A1",
117 |         "action_dict": {"A1": [0, 1]},
118 |         "feature": ["L1", "A1"],
119 |         "outcome": "Y1"
120 |     },
121 |     {
122 |         "model": "p_outcome ~ L1 + A1 + Y1 * A2",
123 |         "action_dict": {"A2": [0, 1]},
124 |         "feature": ["L1", "A1", "Y1", "A2"],
125 |         "outcome": "Y2"
126 |     }
127 | ]
128 | # fit model
129 | dtr_model = IqLearnReg(
130 |     n_stages=2,
131 |     model_info=model_info
132 | )
133 | dtr_model.fit(df)
134 | 
135 | # predict optimal atcions
136 | opt_action_stage_1 = dtr_model.predict(df, 0)
137 | opt_action_stage_2 = dtr_model.predict(df, 1)
138 | opt_action_all_stages = dtr_model.predict_all_stages(df)
139 | ```
140 | 
141 | Please see [examples](https://github.com/fullflu/pydtr/blob/master/examples/) to get more information.
142 | 
143 | ## Authors
144 | 
145 | - [@fullflu](https://github.com/fullflu) 
146 | 
147 | ## Contributors
148 | 
149 | Please feel free to create issues or to send pull-requests!
150 | 
151 | If all checkes have passed in pull-requests, I will merge and release them.
152 | 
153 | ## License
154 | 
155 | [BSD](https://github.com/fullflu/pydtr/blob/master/LICENSE)
156 | 
157 | 
158 | ## Structure
159 | 
160 | ```
161 | ├── .circleci
162 | │   ├── config.yml
163 | ├── .github
164 | │   ├── CODEOWNERS
165 | ├── LICENSE
166 | ├── MANIFEST.IN
167 | ├── Makefile
168 | ├── README.md
169 | ├── examples
170 | │   ├── ...several notebooks...
171 | ├── setup.cfg
172 | ├── setup.py
173 | ├── src
174 | │   ├── pydtr
175 | │   │   ├── __init__.py
176 | │   │   └── iqlearn
177 | │   │       ├── __init__.py
178 | │   │       ├── base.py
179 | │   │       └── regression.py
180 | └── tests
181 |     ├── test_iqlearn_sklearn_predict.py
182 |     └── test_iqlearn_sm_predict.py
183 | ```
184 | 
185 | ## References
186 | 
187 | - Chakraborty, B, Moodie, EE. *Statistical Methods for Dynamic Treatment Regimes.* Springer, New York, 2013.
188 | 


--------------------------------------------------------------------------------
/examples/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fullflu/pydtr/8fbf836b2871ddfe4298a76c1257bfd06b0953ed/examples/.gitkeep


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [aliases]
2 | test=pytest
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import os
 5 | import sys
 6 | 
 7 | from setuptools import setup
 8 | from setuptools import find_packages
 9 | from setuptools.command.install import install
10 | from codecs import open
11 | 
12 | from src.pydtr.version import __version__
13 | 
14 | here = os.path.abspath(os.path.dirname(__file__))
15 | 
16 | with open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
17 |     long_description = f.read()
18 | 
19 | 
20 | class VerifyVersionCommand(install):
21 |     """Custom command to verify that the git tag matches our version"""
22 |     description = 'verify that the git tag matches our version'
23 | 
24 |     def run(self):
25 |         tag = os.getenv('CIRCLE_TAG')
26 | 
27 |         if tag != __version__:
28 |             info = "Git tag: {0} does not match the version of this app: {1}".format(
29 |                 tag, __version__
30 |             )
31 |             sys.exit(info)
32 | 
33 | 
34 | setup(
35 |     name='pydtr',
36 |     version=__version__,
37 |     description='Python library of Dynamic Treatment Regimes',
38 |     long_description=long_description,
39 |     long_description_content_type='text/markdown',
40 |     url='https://github.com/fullflu/pydtr',
41 |     author='fullflu',
42 |     author_email='k.takayama0902@gmail.com',
43 |     license='BSD',
44 |     install_requires=[
45 |         'pandas>=1.1.2',
46 |         'scikit-learn>=0.23.2',
47 |         'numpy>=1.19.2',
48 |         'statsmodels>=0.12.0'
49 |     ],
50 |     keywords=['dynamic treatment regimes', 'reinforcement learning', 'dtr'],
51 |     include_package_data=True,
52 |     package_dir={'': "src"},
53 |     packages=find_packages('src'),
54 |     classifiers=[
55 |         'Topic :: Scientific/Engineering :: Artificial Intelligence',
56 |         'License :: OSI Approved :: BSD License',
57 |         'Programming Language :: Python :: 3',
58 |         'Programming Language :: Python :: 3.6',
59 |     ],
60 |     setup_requires=["pytest-runner"],
61 |     tests_require=["pytest", "pytest-cov", "coverage", "category_encoders"],
62 |     cmdclass={
63 |         'verify': VerifyVersionCommand,
64 |     }
65 | )
66 | 


--------------------------------------------------------------------------------
/src/pydtr/__init__.py:
--------------------------------------------------------------------------------
1 | from .version import __version__
2 | 


--------------------------------------------------------------------------------
/src/pydtr/iqlearn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fullflu/pydtr/8fbf836b2871ddfe4298a76c1257bfd06b0953ed/src/pydtr/iqlearn/__init__.py


--------------------------------------------------------------------------------
/src/pydtr/iqlearn/base.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | from typing import Dict
  3 | from abc import ABCMeta, abstractmethod
  4 | 
  5 | 
  6 | import pandas as pd
  7 | import numpy as np
  8 | from sklearn.utils import resample
  9 | 
 10 | 
 11 | class IqLearnBase(object):
 12 |     """
 13 |     Base class of iterative q learning
 14 | 
 15 |     Attributes
 16 |     ----------
 17 |     n_stages : int
 18 |         the number of total stages
 19 |     model_info : list of dict
 20 |         list of model information dictionary of each stage
 21 |           - model: string or sklearn-based instance
 22 |               model object
 23 |           - action_dict: dict
 24 |               action varible -> domain of the variable
 25 |           - feature: str
 26 |               feature variable names
 27 |           - outcome: str
 28 |               a outcome variable name
 29 |     n_bs : int
 30 |         the number of bootstrap sampling (0 means no bootstrap)
 31 |     """
 32 |     def __init__(self, n_stages: int, model_info: list, n_bs: int = 0) -> None:
 33 |         """
 34 |         Parameters
 35 |         ----------
 36 |         n_stages : int
 37 |             The number of total stages
 38 |         model_info : list of dict
 39 |             List of model information dictionary of each stage
 40 |             - model: string or sklearn-based instance
 41 |                 Model object
 42 |             - action_dict: dict
 43 |                 Action varible -> domain of the variable
 44 |             - feature: str
 45 |                 Feature variable names
 46 |             - outcome: str
 47 |                 An outcome variable name
 48 |         n_bs : int
 49 |             The number of bootstrap sampling (0 means no bootstrap)
 50 |         """
 51 |         assert n_stages == len(model_info), "n_stages must be the same as the length of model_info"
 52 |         self.n_stages = n_stages
 53 |         self.model_info = model_info
 54 |         self.n_bs = n_bs
 55 |         self._init_set()
 56 | 
 57 |     def _get_max_val_df(self, model, X: pd.DataFrame, t: int) -> pd.DataFrame:
 58 |         """
 59 |         Get maximum value of Q function of stage t (ref: max_{A} Q(X, A))
 60 |         """
 61 |         action_dict = self.model_info[t]["action_dict"]
 62 |         keys = list(action_dict.keys())
 63 |         # dataframe to store pseudo outcomes
 64 |         max_val_df = pd.DataFrame(columns=keys)
 65 |         # initialize maximum value
 66 |         max_val_df["val"] = [-1 * np.inf] * X.shape[0]
 67 |         tmp_df = X.reset_index(drop=True).copy()
 68 |         for tmp_dict in [dict(zip(action_dict, vs)) for vs in itertools.product(*action_dict.values())]:
 69 |             # consider multiple action variables
 70 |             tmp_keys = list(tmp_dict.keys())
 71 |             tmp_values = list(tmp_dict.values())
 72 |             if len(tmp_values) == 1:
 73 |                 tmp_values = tmp_values[0]
 74 |             # raise ValueError
 75 |             # set action values
 76 |             tmp_df[tmp_keys] = tmp_values
 77 |             # predict q value for the action values
 78 |             val_a = model.predict(tmp_df)
 79 |             # update optimal actions and q values
 80 |             flag = max_val_df["val"] < val_a
 81 |             max_val_df.loc[flag.values, tmp_keys] = tmp_values
 82 |             max_val_df.loc[flag.values, "val"] = val_a[flag.values]
 83 |             # reset independent variables
 84 |             tmp_df = X.reset_index(drop=True).copy()
 85 |         return max_val_df
 86 | 
 87 |     def _get_p_outcome(self, model: object, X: pd.DataFrame, y: pd.Series, t: int) -> pd.Series:
 88 |         """
 89 |         Get pseudo-outcome of stage t
 90 |         """
 91 |         # return pseudo outcome
 92 |         max_val_df = self._get_max_val_df(model, X, t + 1)
 93 |         return max_val_df["val"].values + y.values
 94 | 
 95 |     @staticmethod
 96 |     def _sample_bs(df: pd.DataFrame, size_bs: int = -1) -> pd.DataFrame:
 97 |         """
 98 |         bootstrap sampling
 99 |         """
100 |         # bootstrap sampling
101 |         if size_bs == -1:
102 |             size_bs = df.shape[0]
103 |         return resample(df, n_samples=size_bs)
104 | 
105 |     def fit(self, df: pd.DataFrame):
106 |         """
107 |         Fit dtr models
108 | 
109 |         Parameters
110 |         ----------
111 |         df: pandas.dataframe
112 |             input data (each row contains all stage information of each individual)
113 | 
114 |         Returns
115 |         -------
116 |         self
117 |         """
118 |         # fit models using all data
119 |         for t in reversed(range(self.n_stages)):
120 |             print("Stage: {}".format(t))
121 |             X = df[self.model_info[t]["feature"]]
122 |             y = df[self.model_info[t]["outcome"]]
123 |             if t == self.n_stages - 1:
124 |                 p_outcome = y.values
125 |             else:
126 |                 X2 = df[self.model_info[t + 1]["feature"]]
127 |                 y2 = df[self.model_info[t + 1]["outcome"]]
128 |                 p_outcome = self._get_p_outcome(self.model_all[t + 1], X2, y2, t)
129 |             self._fit_model_all_data(X, p_outcome, t)
130 | 
131 |         # fit models using bootstrap
132 |         for i in range(self.n_bs):
133 |             df_i = self._sample_bs(df)
134 |             for t in reversed(range(self.n_stages)):
135 |                 # extract feature and outcome
136 |                 X = df_i[self.model_info[t]["feature"]]
137 |                 y = df_i[self.model_info[t]["outcome"]]
138 |                 if t == self.n_stages - 1:
139 |                     p_outcome = y.values
140 |                 else:
141 |                     X2 = df_i[self.model_info[t + 1]["feature"]]
142 |                     y2 = df_i[self.model_info[t + 1]["outcome"]]
143 |                     p_outcome = self._get_p_outcome(self.model_all[t + 1], X2, y2, t)
144 |                 # fit model of stage t
145 |                 self._fit_model(X, p_outcome, t, i)
146 |         return self
147 | 
148 |     @abstractmethod
149 |     def _fit_model(self, X: pd.DataFrame, p_outcome: np.array, t: int, i: int) -> None:
150 |         pass
151 | 
152 |     @abstractmethod
153 |     def _fit_model_all_data(self, X: pd.DataFrame, p_outcome: np.array, t: int) -> None:
154 |         pass
155 | 
156 |     def _init_set(self) -> None:
157 |         self.model_all = {}
158 |         self.models = {}
159 |         for t in range(self.n_stages):
160 |             # TODO: check model type
161 |             # self._check_model_type(self.model_info[t]["model"])
162 |             self.model_all[t] = None
163 |             if self.n_bs:
164 |                 self.models[t] = np.array([None] * (self.n_bs))
165 | 
166 |     def get_params(self) -> pd.DataFrame:
167 |         # get estimated parameters
168 |         params = pd.DataFrame()
169 |         for t in reversed(range(self.n_stages)):
170 |             if type(self.model_info[t]["model"]) == str:
171 |                 tmp_df = pd.melt(pd.DataFrame([i.params for i in self.models[t]]))
172 |                 tmp_df["stage"] = t
173 |                 params = pd.concat([params, tmp_df])
174 |         return params
175 | 
176 |     def predict(self, df: pd.DataFrame, t: int) -> pd.DataFrame:
177 |         """
178 |         Predict optimal treatment of stage t for each row
179 | 
180 |         Parameters
181 |         ----------
182 |         df: pandas.dataframe
183 |             input data (each row contains all stage information of each individual)
184 |         t: int
185 |             stage
186 | 
187 |         Returns
188 |         -------
189 |         max_val_df: pandas.dataframe
190 |             optimal treatments and their q-values
191 |         """
192 |         # return optimal actions for the specified stage
193 |         X = df[self.model_info[t]["feature"]].copy()
194 |         max_val_df = self._get_max_val_df(self.model_all[t], X, t)
195 |         return max_val_df
196 | 
197 |     def predict_all_stages(self, df: pd.DataFrame) -> pd.DataFrame:
198 |         """
199 |         Predict optimal treatment of all stages for each row
200 | 
201 |         Parameters
202 |         ----------
203 |         df: pandas.dataframe
204 |             input data (each row contains all stage information of each individual)
205 |         t: int
206 |             stage
207 | 
208 |         Returns
209 |         -------
210 |         max_df: pandas.dataframe
211 |             optimal treatments, their q-values, and stage information
212 |         """
213 |         # return optimal actions for all stages
214 |         max_df = pd.DataFrame()
215 |         for t in range(self.n_stages):
216 |             X = df[self.model_info[t]["feature"]]
217 |             max_val_df = self._get_max_val_df(self.model_all[t], X, t)
218 |             max_val_df["stage"] = t
219 |             max_val_df["row_id"] = np.arange(X.shape[0])
220 |             max_df = pd.concat([max_df, max_val_df], sort=True).reset_index(drop=True)
221 |         return max_df
222 | 


--------------------------------------------------------------------------------
/src/pydtr/iqlearn/regression.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import statsmodels.formula.api as smf
 4 | 
 5 | from .base import IqLearnBase
 6 | 
 7 | 
 8 | class IqLearnReg(IqLearnBase):
 9 |     def _fit_model(self, X: pd.DataFrame, p_outcome: np.array, t: int, i: int) -> None:
10 |         """
11 |         Fit dtr model of stage t and bootstrap i
12 | 
13 |         Parameters
14 |         ----------
15 |         X: pandas.dataframe
16 |             input data (each row contains feature of each individual)
17 |         p_outcome: np.array
18 |             pseudo outcome of stage t
19 |         t: int
20 |             stage
21 |         i: int
22 |             bootstrap index
23 | 
24 |         Returns
25 |         -------
26 |         None
27 |         """
28 |         if type(self.model_info[t]["model"]) == str:
29 |             df = X.copy()
30 |             df["p_outcome"] = p_outcome
31 |             self.models[t][i] = smf.ols(formula=self.model_info[t]["model"], data=df).fit()
32 |         else:
33 |             self.models[t][i] = self.model_info[t]["model"].fit(X, p_outcome)
34 | 
35 |     def _fit_model_all_data(self, X: pd.DataFrame, p_outcome: np.array, t: int) -> None:
36 |         """
37 |         Fit dtr model of stage t
38 | 
39 |         Parameters
40 |         ----------
41 |         X: pandas.dataframe
42 |             input data (each row contains feature of each individual)
43 |         p_outcome: np.array
44 |             pseudo outcome of stage t
45 |         t: int
46 |             stage
47 | 
48 |         Returns
49 |         -------
50 |         None
51 |         """
52 |         if type(self.model_info[t]["model"]) == str:
53 |             df = X.copy()
54 |             df["p_outcome"] = p_outcome
55 |             self.model_all[t] = smf.ols(formula=self.model_info[t]["model"], data=df).fit()
56 |         else:
57 |             self.model_all[t] = self.model_info[t]["model"].fit(X, p_outcome)
58 | 


--------------------------------------------------------------------------------
/src/pydtr/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.0.2"
2 | 


--------------------------------------------------------------------------------
/tests/test_iqlearn_sklearn_predict.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import category_encoders as ce
  4 | from sklearn.base import BaseEstimator, RegressorMixin
  5 | from sklearn.ensemble import RandomForestRegressor
  6 | from pandas._testing import assert_frame_equal
  7 | from sklearn.linear_model import Ridge
  8 | from sklearn.preprocessing import StandardScaler
  9 | from sklearn.pipeline import Pipeline
 10 | 
 11 | from pydtr.iqlearn.regression import IqLearnReg
 12 | 
 13 | 
 14 | class RegWrapperRule1(BaseEstimator, RegressorMixin):
 15 |     def __init__(self, thres: int, fit_value: int) -> None:
 16 |         self.thres = thres
 17 |         self.fit_value = fit_value
 18 | 
 19 |     def fit(self, X: pd.DataFrame, y: np.array):
 20 |         return self
 21 | 
 22 |     def predict(self, X):
 23 |         flag = X["L1"] < self.thres
 24 |         q_val = pd.Series(np.zeros(X.shape[0]))
 25 |         # if L1 >= thres;  then q(L1, 1) > q(L1, 0); else q(L1, 1) < q(L1, 0)
 26 |         q_val[flag] = X.loc[flag, "A1"] * 2 + (1 - X.loc[flag, "A1"]) * 1
 27 |         q_val[~flag] = X.loc[~flag, "A1"] * 1 + (1 - X.loc[~flag, "A1"]) * 2
 28 |         return q_val
 29 | 
 30 | 
 31 | class RegWrapperRule2(BaseEstimator, RegressorMixin):
 32 |     def __init__(self, thres: int, fit_value: int) -> None:
 33 |         self.thres = thres
 34 |         self.fit_value = fit_value
 35 | 
 36 |     def fit(self, X: pd.DataFrame, y: np.array):
 37 |         return self
 38 | 
 39 |     def predict(self, X: pd.DataFrame) -> pd.Series:
 40 |         flag = X["L1"] < self.thres
 41 |         q_val = pd.Series(np.zeros(X.shape[0]))
 42 |         # if L1 >= thres;  then q(L1, 1) < q(L1, 0); else q(L1, 1) > q(L1, 0)
 43 |         q_val[flag] = X.loc[flag, "A2"] * 1 + (1 - X.loc[flag, "A2"]) * 2
 44 |         q_val[~flag] = X.loc[~flag, "A2"] * 2 + (1 - X.loc[~flag, "A2"]) * 1
 45 |         return q_val
 46 | 
 47 | 
 48 | def test_iqlearn_regwrapper_rule():
 49 |     # setup params
 50 |     n = 10
 51 |     thres = int(n / 2)
 52 |     # sample rule base models
 53 |     model1 = RegWrapperRule1(thres, 1)
 54 |     model2 = RegWrapperRule2(thres, 2)
 55 |     # sample dataframe
 56 |     df = pd.DataFrame()
 57 |     df["L1"] = np.arange(n)
 58 |     df["A1"] = [0, 1] * int(n / 2)
 59 |     df["A2"] = [0] * int(n / 2) + [1] * int(n / 2)
 60 |     df["Y1"] = np.zeros(n)
 61 |     df["Y2"] = np.zeros(n)
 62 |     # set model info
 63 |     model_info = [
 64 |         {
 65 |             "model": model1,
 66 |             "action_dict": {"A1": [0, 1]},
 67 |             "feature": ["L1", "A1"],
 68 |             "outcome": "Y1"
 69 |         },
 70 |         {
 71 |             "model": model2,
 72 |             "action_dict": {"A2": [0, 1]},
 73 |             "feature": ["L1", "A1", "Y1", "A2"],
 74 |             "outcome": "Y2"
 75 |         }
 76 |     ]
 77 |     # fit model (dummy)
 78 |     dtr_model = IqLearnReg(
 79 |         n_stages=2,
 80 |         model_info=model_info
 81 |     )
 82 |     dtr_model.fit(df)
 83 |     assert dtr_model.model_all[0].fit_value == 1
 84 |     assert dtr_model.model_all[1].fit_value == 2
 85 |     # predict optimal atcions
 86 |     action_1 = dtr_model.predict(df, 0)
 87 |     action_2 = dtr_model.predict(df, 1)
 88 |     action_all = dtr_model.predict_all_stages(df)
 89 |     # stage 1 test
 90 |     true_action_1 = [1] * int(n / 2) + [0] * int(n / 2)
 91 |     assert all([a == b for a, b in zip(action_1["A1"].tolist(), true_action_1)])
 92 |     # stage 2 test
 93 |     true_action_2 = [0] * int(n / 2) + [1] * int(n / 2)
 94 |     assert all([a == b for a, b in zip(action_2["A2"].tolist(), true_action_2)])
 95 |     # all stage test
 96 |     assert action_all.shape[0] == action_1.shape[0] * 2
 97 |     a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True)
 98 |     a2 = action_all.query("stage == 1")[["A2", "val"]].reset_index(drop=True)
 99 |     assert_frame_equal(action_1, a1)
100 |     assert_frame_equal(action_2, a2)
101 |     # function test
102 |     q_val_1 = dtr_model._get_max_val_df(dtr_model.model_all[0], df, 0)
103 |     assert all([a == b for a, b in zip(q_val_1["A1"].tolist(), true_action_1)])
104 |     q_val_2 = dtr_model._get_max_val_df(dtr_model.model_all[1], df, 1)
105 |     assert all([a == b for a, b in zip(q_val_2["A2"].tolist(), true_action_2)])
106 |     # fit bootstrap model (dummy)
107 |     dtr_model = IqLearnReg(
108 |         n_stages=2,
109 |         model_info=model_info,
110 |         n_bs=2
111 |     )
112 |     dtr_model.fit(df)
113 |     assert dtr_model.model_all[0].fit_value == 1
114 |     assert dtr_model.model_all[1].fit_value == 2
115 |     # check bootstrap model
116 |     assert dtr_model.models[0][0].fit_value == 1
117 |     assert dtr_model.models[1][0].fit_value == 2
118 |     assert len(dtr_model.models[0]) == 2
119 | 
120 | 
121 | def test_iqlearn_rf():
122 |     # setup params
123 |     n = 10
124 |     thres = int(n / 2)
125 |     # rf models
126 |     model1 = RandomForestRegressor()
127 |     model2 = RandomForestRegressor()
128 |     # sample dataframe
129 |     df = pd.DataFrame()
130 |     df["L1"] = np.arange(n)
131 |     df["A1"] = [0, 1] * int(n / 2)
132 |     df["A2"] = [0] * int(n / 2) + [1] * int(n / 2)
133 |     df["Y1"] = np.zeros(n)
134 |     df["Y2"] = np.zeros(n)
135 |     # set model info
136 |     model_info = [
137 |         {
138 |             "model": model1,
139 |             "action_dict": {"A1": [0, 1]},
140 |             "feature": ["L1", "A1"],
141 |             "outcome": "Y1"
142 |         },
143 |         {
144 |             "model": model2,
145 |             "action_dict": {"A2": [0, 1]},
146 |             "feature": ["L1", "A1", "Y1", "A2"],
147 |             "outcome": "Y2"
148 |         }
149 |     ]
150 |     # fit model
151 |     dtr_model = IqLearnReg(
152 |         n_stages=2,
153 |         model_info=model_info
154 |     )
155 |     dtr_model.fit(df)
156 |     # predict optimal atcions
157 |     action_1 = dtr_model.predict(df, 0)
158 |     action_2 = dtr_model.predict(df, 1)
159 |     action_all = dtr_model.predict_all_stages(df)
160 |     # stage 1 test
161 |     assert action_1.shape[0] == df.shape[0]
162 |     # stage 2 test
163 |     assert action_2.shape[0] == df.shape[0]
164 |     # all stage test
165 |     assert action_all.shape[0] == action_1.shape[0] * 2
166 |     a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True)
167 |     a2 = action_all.query("stage == 1")[["A2", "val"]].reset_index(drop=True)
168 |     assert_frame_equal(action_1, a1)
169 |     assert_frame_equal(action_2, a2)
170 |     # fit bootstrap model
171 |     dtr_model = IqLearnReg(
172 |         n_stages=2,
173 |         model_info=model_info,
174 |         n_bs=2
175 |     )
176 |     dtr_model.fit(df)
177 |     assert len(dtr_model.models[0]) == 2
178 | 
179 | 
180 | def test_iqlearn_rf_multiple_actions():
181 |     # setup params
182 |     n = 10
183 |     thres = int(n / 2)
184 |     # rf models
185 |     model1 = RandomForestRegressor()
186 |     model2 = RandomForestRegressor()
187 |     # sample dataframe
188 |     df = pd.DataFrame()
189 |     df["L1"] = np.arange(n)
190 |     df["A1"] = [0, 1] * int(n / 2)
191 |     df["A2"] = [0] * int(n / 2) + [1] * int(n / 2)
192 |     df["Y1"] = np.zeros(n)
193 |     df["Y2"] = np.zeros(n)
194 |     # set model info
195 |     model_info = [
196 |         {
197 |             "model": model1,
198 |             "action_dict": {"A1": [0, 1]},
199 |             "feature": ["L1", "A1"],
200 |             "outcome": "Y1"
201 |         },
202 |         {
203 |             "model": model2,
204 |             "action_dict": {"A1": [0, 1], "A2": [0, 1]},
205 |             "feature": ["L1", "A1", "Y1", "A2"],
206 |             "outcome": "Y2"
207 |         }
208 |     ]
209 |     # fit model
210 |     dtr_model = IqLearnReg(
211 |         n_stages=2,
212 |         model_info=model_info
213 |     )
214 |     dtr_model.fit(df)
215 |     # predict optimal atcions
216 |     action_1 = dtr_model.predict(df, 0)
217 |     action_2 = dtr_model.predict(df, 1)
218 |     action_all = dtr_model.predict_all_stages(df)
219 |     # stage 1 test
220 |     assert action_1.shape[0] == df.shape[0]
221 |     # stage 2 test
222 |     assert action_2.shape[0] == df.shape[0]
223 |     # all stage test
224 |     assert action_all.shape[0] == action_1.shape[0] * 2
225 |     a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True)
226 |     a2 = action_all.query("stage == 1")[["A1", "A2", "val"]].reset_index(drop=True)
227 |     assert_frame_equal(action_1, a1)
228 |     assert_frame_equal(action_2, a2)
229 |     # fit bootstrap model
230 |     dtr_model = IqLearnReg(
231 |         n_stages=2,
232 |         model_info=model_info,
233 |         n_bs=2
234 |     )
235 |     dtr_model.fit(df)
236 |     assert len(dtr_model.models[0]) == 2
237 | 
238 | 
239 | def test_iqlearn_rf_ordinalencoder():
240 |     # setup params
241 |     n = 30
242 |     thres = int(n / 2)
243 |     # rf models
244 |     model1 = RandomForestRegressor()
245 |     model2 = RandomForestRegressor()
246 |     # sample dataframe
247 |     df = pd.DataFrame()
248 |     df["L1"] = np.arange(n)
249 |     df["A1"] = [0, 1, 2] * int(n / 3)
250 |     df["A2"] = [0] * int(n / 3) + [1] * int(n / 3) + [3] * int(n / 3)
251 |     df["Y1"] = np.zeros(n)
252 |     df["Y2"] = np.zeros(n)
253 |     # set model info
254 |     model_info = [
255 |         {
256 |             "model": model1,
257 |             "action_dict": {"A1": [0, 1, 2]},
258 |             "feature": ["L1", "A1"],
259 |             "outcome": "Y1"
260 |         },
261 |         {
262 |             "model": model2,
263 |             "action_dict": {"A2": [0, 1, 3]},
264 |             "feature": ["L1", "A1", "Y1", "A2"],
265 |             "outcome": "Y2"
266 |         }
267 |     ]
268 |     # fit model
269 |     dtr_model = IqLearnReg(
270 |         n_stages=2,
271 |         model_info=model_info
272 |     )
273 |     dtr_model.fit(df)
274 |     # predict optimal atcions
275 |     action_1 = dtr_model.predict(df, 0)
276 |     action_2 = dtr_model.predict(df, 1)
277 |     action_all = dtr_model.predict_all_stages(df)
278 |     # stage 1 test
279 |     assert action_1.shape[0] == df.shape[0]
280 |     # stage 2 test
281 |     assert action_2.shape[0] == df.shape[0]
282 |     # all stage test
283 |     assert action_all.shape[0] == action_1.shape[0] * 2
284 |     a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True)
285 |     a2 = action_all.query("stage == 1")[["A2", "val"]].reset_index(drop=True)
286 |     assert_frame_equal(action_1, a1)
287 |     assert_frame_equal(action_2, a2)
288 |     # fit bootstrap model
289 |     dtr_model = IqLearnReg(
290 |         n_stages=2,
291 |         model_info=model_info,
292 |         n_bs=2
293 |     )
294 |     dtr_model.fit(df)
295 |     assert len(dtr_model.models[0]) == 2
296 | 
297 | 
298 | def test_iqlearn_pipeline_category_encoder():
299 |     # setup params
300 |     n = 30
301 |     thres = int(n / 2)
302 |     # statsmodels
303 |     model1 = Pipeline(
304 |         [
305 |             ("ce0", ce.OneHotEncoder(cols=["A1"])),
306 |             ("scale", StandardScaler()),
307 |             ("model", Ridge())
308 |         ]
309 |     )
310 |     model2 = Pipeline(
311 |         [
312 |             ("ce0", ce.OneHotEncoder(cols=["A1", "A2"])),
313 |             ("scale", StandardScaler()),
314 |             ("model", Ridge())
315 |         ]
316 |     )
317 |     # sample dataframe
318 |     df = pd.DataFrame()
319 |     df["L1"] = np.arange(n)
320 |     df["A1"] = ["A", "B", "C"] * int(n / 3)
321 |     df["A2"] = ["A"] * int(n / 3) + ["C"] * int(n / 3) + ["D"] * int(n / 3)
322 |     df["Y1"] = np.zeros(n)
323 |     df["Y2"] = np.zeros(n)
324 |     # set model info
325 |     model_info = [
326 |         {
327 |             "model": model1,
328 |             "action_dict": {"A1": ["A", "B", "C"]},
329 |             "feature": ["L1", "A1"],
330 |             "outcome": "Y1"
331 |         },
332 |         {
333 |             "model": model2,
334 |             "action_dict": {"A2": ["A", "C", "D"]},
335 |             "feature": ["L1", "A1", "Y1", "A2"],
336 |             "outcome": "Y2"
337 |         }
338 |     ]
339 |     # fit model
340 |     dtr_model = IqLearnReg(
341 |         n_stages=2,
342 |         model_info=model_info
343 |     )
344 |     dtr_model.fit(df)
345 |     # predict optimal atcions
346 |     action_1 = dtr_model.predict(df, 0)
347 |     action_2 = dtr_model.predict(df, 1)
348 |     action_all = dtr_model.predict_all_stages(df)
349 |     # stage 1 test
350 |     assert action_1.shape[0] == df.shape[0]
351 |     # stage 2 test
352 |     assert action_2.shape[0] == df.shape[0]
353 |     # all stage test
354 |     assert action_all.shape[0] == action_1.shape[0] * 2
355 |     a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True)
356 |     a2 = action_all.query("stage == 1")[["A2", "val"]].reset_index(drop=True)
357 |     assert_frame_equal(action_1, a1)
358 |     assert_frame_equal(action_2, a2)
359 | 


--------------------------------------------------------------------------------
/tests/test_iqlearn_sm_predict.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from sklearn.base import BaseEstimator, RegressorMixin
  4 | from pandas._testing import assert_frame_equal
  5 | 
  6 | from pydtr.iqlearn.regression import IqLearnReg
  7 | 
  8 | 
  9 | def test_iqlearn_sm():
 10 |     # setup params
 11 |     n = 10
 12 |     thres = int(n / 2)
 13 |     # statsmodels
 14 |     model1 = "p_outcome ~ L1 * A1"
 15 |     model2 = "p_outcome ~ L1 + A1 + Y1 * A2"
 16 |     # sample dataframe
 17 |     df = pd.DataFrame()
 18 |     df["L1"] = np.arange(n)
 19 |     df["A1"] = [0, 1] * int(n / 2)
 20 |     df["A2"] = [0] * int(n / 2) + [1] * int(n / 2)
 21 |     df["Y1"] = np.zeros(n)
 22 |     df["Y2"] = np.zeros(n)
 23 |     # set model info
 24 |     model_info = [
 25 |         {
 26 |             "model": model1,
 27 |             "action_dict": {"A1": [0, 1]},
 28 |             "feature": ["L1", "A1"],
 29 |             "outcome": "Y1"
 30 |         },
 31 |         {
 32 |             "model": model2,
 33 |             "action_dict": {"A2": [0, 1]},
 34 |             "feature": ["L1", "A1", "Y1", "A2"],
 35 |             "outcome": "Y2"
 36 |         }
 37 |     ]
 38 |     # fit model
 39 |     dtr_model = IqLearnReg(
 40 |         n_stages=2,
 41 |         model_info=model_info
 42 |     )
 43 |     dtr_model.fit(df)
 44 |     # predict optimal atcions
 45 |     action_1 = dtr_model.predict(df, 0)
 46 |     action_2 = dtr_model.predict(df, 1)
 47 |     action_all = dtr_model.predict_all_stages(df)
 48 |     # stage 1 test
 49 |     assert action_1.shape[0] == df.shape[0]
 50 |     # stage 2 test
 51 |     assert action_2.shape[0] == df.shape[0]
 52 |     # all stage test
 53 |     assert action_all.shape[0] == action_1.shape[0] * 2
 54 |     a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True)
 55 |     a2 = action_all.query("stage == 1")[["A2", "val"]].reset_index(drop=True)
 56 |     assert_frame_equal(action_1, a1)
 57 |     assert_frame_equal(action_2, a2)
 58 |     # fit bootstrap model
 59 |     dtr_model = IqLearnReg(
 60 |         n_stages=2,
 61 |         model_info=model_info,
 62 |         n_bs=2
 63 |     )
 64 |     dtr_model.fit(df)
 65 |     assert len(dtr_model.models[0]) == 2
 66 | 
 67 | 
 68 | def test_iqlearn_sm_multiple_actions():
 69 |     # setup params
 70 |     n = 10
 71 |     thres = int(n / 2)
 72 |     # statsmodels
 73 |     model1 = "p_outcome ~ L1 * A1"
 74 |     model2 = "p_outcome ~ L1 + A1 + Y1 * A2"
 75 |     # sample dataframe
 76 |     df = pd.DataFrame()
 77 |     df["L1"] = np.arange(n)
 78 |     df["A1"] = [0, 1] * int(n / 2)
 79 |     df["A2"] = [0] * int(n / 2) + [1] * int(n / 2)
 80 |     df["Y1"] = np.zeros(n)
 81 |     df["Y2"] = np.zeros(n)
 82 |     # set model info
 83 |     model_info = [
 84 |         {
 85 |             "model": model1,
 86 |             "action_dict": {"A1": [0, 1]},
 87 |             "feature": ["L1", "A1"],
 88 |             "outcome": "Y1"
 89 |         },
 90 |         {
 91 |             "model": model2,
 92 |             "action_dict": {"A1": [0, 1], "A2": [0, 1]},
 93 |             "feature": ["L1", "A1", "Y1", "A2"],
 94 |             "outcome": "Y2"
 95 |         }
 96 |     ]
 97 |     # fit model
 98 |     dtr_model = IqLearnReg(
 99 |         n_stages=2,
100 |         model_info=model_info
101 |     )
102 |     dtr_model.fit(df)
103 |     # predict optimal atcions
104 |     action_1 = dtr_model.predict(df, 0)
105 |     action_2 = dtr_model.predict(df, 1)
106 |     action_all = dtr_model.predict_all_stages(df)
107 |     # stage 1 test
108 |     assert action_1.shape[0] == df.shape[0]
109 |     # stage 2 test
110 |     assert action_2.shape[0] == df.shape[0]
111 |     # all stage test
112 |     assert action_all.shape[0] == action_1.shape[0] * 2
113 |     a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True)
114 |     a2 = action_all.query("stage == 1")[["A1", "A2", "val"]].reset_index(drop=True)
115 |     assert_frame_equal(action_1, a1)
116 |     assert_frame_equal(action_2, a2)
117 |     # fit bootstrap model
118 |     dtr_model = IqLearnReg(
119 |         n_stages=2,
120 |         model_info=model_info,
121 |         n_bs=2
122 |     )
123 |     dtr_model.fit(df)
124 |     assert len(dtr_model.models[0]) == 2
125 | 
126 | 
127 | def test_iqlearn_sm_multinomial_action():
128 |     # setup params
129 |     n = 30
130 |     thres = int(n / 2)
131 |     # statsmodels
132 |     model1 = "p_outcome ~ L1 * C(A1)"
133 |     model2 = "p_outcome ~ L1 + A1 + Y1 * C(A2)"
134 |     # sample dataframe
135 |     df = pd.DataFrame()
136 |     df["L1"] = np.arange(n)
137 |     df["A1"] = ["A", "B", "C"] * int(n / 3)
138 |     df["A2"] = ["A"] * int(n / 3) + ["C"] * int(n / 3) + ["D"] * int(n / 3)
139 |     df["Y1"] = np.zeros(n)
140 |     df["Y2"] = np.zeros(n)
141 |     # set model info
142 |     model_info = [
143 |         {
144 |             "model": model1,
145 |             "action_dict": {"A1": ["A", "B", "C"]},
146 |             "feature": ["L1", "A1"],
147 |             "outcome": "Y1"
148 |         },
149 |         {
150 |             "model": model2,
151 |             "action_dict": {"A2": ["A", "C", "D"]},
152 |             "feature": ["L1", "A1", "Y1", "A2"],
153 |             "outcome": "Y2"
154 |         }
155 |     ]
156 |     # fit model
157 |     dtr_model = IqLearnReg(
158 |         n_stages=2,
159 |         model_info=model_info
160 |     )
161 |     dtr_model.fit(df)
162 |     # predict optimal atcions
163 |     action_1 = dtr_model.predict(df, 0)
164 |     action_2 = dtr_model.predict(df, 1)
165 |     action_all = dtr_model.predict_all_stages(df)
166 |     # stage 1 test
167 |     assert action_1.shape[0] == df.shape[0]
168 |     # stage 2 test
169 |     assert action_2.shape[0] == df.shape[0]
170 |     # all stage test
171 |     assert action_all.shape[0] == action_1.shape[0] * 2
172 |     a1 = action_all.query("stage == 0")[["A1", "val"]].reset_index(drop=True)
173 |     a2 = action_all.query("stage == 1")[["A2", "val"]].reset_index(drop=True)
174 |     assert_frame_equal(action_1, a1)
175 |     assert_frame_equal(action_2, a2)
176 |     # fit bootstrap model
177 |     dtr_model = IqLearnReg(
178 |         n_stages=2,
179 |         model_info=model_info,
180 |         n_bs=2
181 |     )
182 |     dtr_model.fit(df)
183 |     assert len(dtr_model.models[0]) == 2
184 | 
185 | 
186 | def test_iqlearn_sm_get_params():
187 |     # setup params
188 |     n = 300
189 |     thres = int(n / 2)
190 |     # statsmodels
191 |     model1 = "p_outcome ~ L1 * A1"
192 |     model2 = "p_outcome ~ L1 + A1 + Y1 * A2"
193 |     # sample dataframe
194 |     df = pd.DataFrame()
195 |     df["L1"] = np.random.normal(0, size=n)
196 |     df["A1"] = [0, 1] * int(n / 2)
197 |     df["A2"] = [0] * int(n / 2) + [1] * int(n / 2)
198 |     df["Y1"] = df["L1"] * df["A1"] + np.random.normal(0, scale=5, size=n)
199 |     df["Y2"] = df["A1"] + df["Y1"] * df["A2"] + np.random.normal(0, scale=5, size=n)
200 |     # set model info
201 |     model_info = [
202 |         {
203 |             "model": model1,
204 |             "action_dict": {"A1": [0, 1]},
205 |             "feature": ["L1", "A1"],
206 |             "outcome": "Y1"
207 |         },
208 |         {
209 |             "model": model2,
210 |             "action_dict": {"A1": [0, 1], "A2": [0, 1]},
211 |             "feature": ["L1", "A1", "Y1", "A2"],
212 |             "outcome": "Y2"
213 |         }
214 |     ]
215 |     # fit bootstrap model
216 |     dtr_model = IqLearnReg(
217 |         n_stages=2,
218 |         model_info=model_info,
219 |         n_bs=10
220 |     )
221 |     dtr_model.fit(df)
222 |     # get params
223 |     params = dtr_model.get_params()
224 |     l1_unique_shape = params.query("stage == 0 & variable == 'L1'")["value"].unique().shape[0]
225 |     a1_unique_shape = params.query("stage == 0 & variable == 'A1'")["value"].unique().shape[0]
226 |     a2_unique_shape = params.query("stage == 1 & variable == 'A2'")["value"].unique().shape[0]
227 |     assert l1_unique_shape != 1 or a1_unique_shape != 1 or a2_unique_shape != 1
228 |     assert len(dtr_model.models[0]) == 10
229 | 


--------------------------------------------------------------------------------