├── .github └── workflows │ ├── lints.yml │ └── tests.yml ├── .gitignore ├── .readthedocs.yml ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── README_JN.md ├── benchmark ├── README.md ├── cf_policy_search │ ├── README.md │ ├── conf │ │ └── hyperparams.yaml │ ├── custom_dataset.py │ └── run_cf_policy_search.py └── ope │ ├── README.md │ ├── benchmark_ope_estimators.py │ ├── benchmark_ope_estimators_hypara.py │ ├── conf │ ├── config.yaml │ ├── estimator_hyperparams │ │ └── default.yaml │ ├── reg_model_hyperparams │ │ └── default.yaml │ └── setting │ │ └── default.yaml │ ├── poetry.lock │ └── pyproject.toml ├── docs ├── Makefile ├── _static │ └── images │ │ ├── dataset.png │ │ ├── evaluation_of_ope_algo.png │ │ ├── logo.png │ │ ├── overview.png │ │ ├── recommended_fashion_items.png │ │ ├── related_data.png │ │ ├── related_packages.png │ │ └── statistics_of_obd.png ├── about.rst ├── conf.py ├── estimators.rst ├── evaluation_ope.rst ├── index.rst ├── installation.rst ├── make.bat ├── obp.rst ├── ope.rst ├── quickstart.rst ├── references.rst ├── refs.bib ├── related.rst └── requirements.txt ├── examples ├── README.md ├── multiclass │ ├── README.md │ ├── conf │ │ └── hyperparams.yaml │ └── evaluate_off_policy_estimators.py ├── obd │ ├── README.md │ ├── conf │ │ └── hyperparams.yaml │ └── evaluate_off_policy_estimators.py ├── opl │ ├── README.md │ ├── conf │ │ └── hyperparams.yaml │ └── evaluate_off_policy_learners.py ├── quickstart │ ├── README.md │ ├── multiclass.ipynb │ ├── obd.ipynb │ ├── online-bandit-vs-opl-simulation.ipynb │ ├── online-bandit-with-delay-simulation.ipynb │ ├── online-bandit-with-drift-simulation.ipynb │ ├── opl.ipynb │ ├── replay.ipynb │ ├── synthetic.ipynb │ └── synthetic_slate.ipynb ├── replay │ ├── README.md │ └── evaluate_off_policy_estimators.py └── synthetic │ ├── README.md │ ├── conf │ └── hyperparams.yaml │ └── evaluate_off_policy_estimators.py ├── images ├── dataset.png ├── logo.png ├── obd_stats.png ├── ope_results_example.png ├── overview.png └── recommended_fashion_items.png ├── obd ├── README.md ├── README_JN.md ├── bts │ ├── all │ │ ├── all.csv │ │ └── item_context.csv │ ├── men │ │ ├── item_context.csv │ │ └── men.csv │ └── women │ │ ├── item_context.csv │ │ └── women.csv └── random │ ├── all │ ├── all.csv │ └── item_context.csv │ ├── men │ ├── item_context.csv │ └── men.csv │ └── women │ ├── item_context.csv │ └── women.csv ├── obp ├── __init__.py ├── dataset │ ├── __init__.py │ ├── base.py │ ├── multiclass.py │ ├── obd │ │ ├── bts │ │ │ ├── all │ │ │ │ ├── all.csv │ │ │ │ └── item_context.csv │ │ │ ├── men │ │ │ │ ├── item_context.csv │ │ │ │ └── men.csv │ │ │ └── women │ │ │ │ ├── item_context.csv │ │ │ │ └── women.csv │ │ └── random │ │ │ ├── all │ │ │ ├── all.csv │ │ │ └── item_context.csv │ │ │ ├── men │ │ │ ├── item_context.csv │ │ │ └── men.csv │ │ │ └── women │ │ │ ├── item_context.csv │ │ │ └── women.csv │ ├── real.py │ ├── reward_type.py │ ├── synthetic.py │ ├── synthetic_continuous.py │ ├── synthetic_embed.py │ ├── synthetic_multi.py │ └── synthetic_slate.py ├── ope │ ├── __init__.py │ ├── classification_model.py │ ├── estimators.py │ ├── estimators_continuous.py │ ├── estimators_embed.py │ ├── estimators_multi.py │ ├── estimators_slate.py │ ├── estimators_tuning.py │ ├── helper.py │ ├── meta.py │ ├── meta_continuous.py │ ├── meta_multi.py │ ├── meta_slate.py │ ├── regression_model.py │ └── regression_model_slate.py ├── policy │ ├── __init__.py │ ├── base.py │ ├── conf │ │ └── prior_bts.yaml │ ├── contextfree.py │ ├── linear.py │ ├── logistic.py │ ├── offline.py │ ├── offline_continuous.py │ └── policy_type.py ├── simulator │ ├── __init__.py │ ├── coefficient_drifter.py │ ├── delay_sampler.py │ ├── replay.py │ └── simulator.py ├── types.py ├── utils.py └── version.py ├── poetry.lock ├── pyproject.toml ├── setup.cfg ├── setup.py ├── slides ├── slides_EN.pdf └── slides_JN.pdf └── tests ├── dataset ├── test_multiclass.py ├── test_real.py ├── test_synthetic.py ├── test_synthetic_continuous.py ├── test_synthetic_embed.py ├── test_synthetic_multi.py ├── test_synthetic_slate.py └── test_synthetic_slate_functions.py ├── ope ├── conftest.py ├── hyperparams.yaml ├── hyperparams_slate.yaml ├── test_all_estimators.py ├── test_bipw_estimators.py ├── test_dm_estimators.py ├── test_dr_estimators.py ├── test_dr_estimators_continuous.py ├── test_dr_estimators_multi.py ├── test_dr_estimators_slate.py ├── test_importance_weight_estimator.py ├── test_ipw_estimators.py ├── test_ipw_estimators_continuous.py ├── test_ipw_estimators_embed.py ├── test_ipw_estimators_multi.py ├── test_ipw_estimators_slate.py ├── test_kernel_functions.py ├── test_meta.py ├── test_meta_continuous.py ├── test_meta_multi.py ├── test_meta_slate.py ├── test_offline_estimation_performance.py ├── test_propensity_score_estimator.py ├── test_regression_models.py └── test_regression_models_slate.py ├── policy ├── test_contextfree.py ├── test_linear.py ├── test_logistic.py ├── test_offline.py ├── test_offline_continuous.py ├── test_offline_learner_continuous_performance.py └── test_offline_learner_performance.py ├── simulator ├── test_coefficient_drifter.py ├── test_delay_sampler.py └── test_simulator.py └── test_utils.py /.github/workflows/lints.yml: -------------------------------------------------------------------------------- 1 | name: Lints 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: {} 8 | 9 | jobs: 10 | lints: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - name: Checkout 15 | uses: actions/checkout@v2 16 | 17 | - name: Setup Python 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: 3.7 21 | 22 | - name: Black 23 | uses: psf/black@stable 24 | with: 25 | args: ". --check --diff" 26 | 27 | - name: flake8 28 | run: | 29 | python -m pip install --upgrade pip 30 | pip install flake8 31 | flake8 . 32 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: {} 8 | 9 | jobs: 10 | tests: 11 | runs-on: ubuntu-latest 12 | 13 | strategy: 14 | matrix: 15 | python-version: [3.7, 3.8, 3.9] 16 | 17 | # Not intended for forks. 18 | if: github.repository == 'st-tech/zr-obp' 19 | 20 | steps: 21 | - name: Checkout 22 | uses: actions/checkout@v2 23 | 24 | - name: Setup Python${{ matrix.python-version }} 25 | uses: actions/setup-python@v2 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | 29 | - name: Install 30 | run: | 31 | python -m pip install --upgrade pip 32 | pip install --progress-bar off -U setuptools 33 | 34 | # Install pytest 35 | pip install --progress-bar off . 36 | 37 | pip install --progress-bar off pytest 38 | 39 | - name: Tests 40 | run: | 41 | pytest tests 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # additionals 132 | _autosummary/ 133 | __pycache__/ 134 | build/ 135 | dist/ 136 | *.egg-info/ 137 | .vscode/ 138 | .DS_Store 139 | logs/ 140 | open_bandit_dataset/ 141 | note.ipynb 142 | *.npy 143 | *.html 144 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # Required 2 | version: 2 3 | 4 | # Build documentation in the docs/ directory with Sphinx 5 | sphinx: 6 | configuration: docs/conf.py 7 | 8 | # Optionally build your docs in additional formats such as PDF and ePub 9 | formats: all 10 | 11 | # Optionally set the version of Python and requirements required to build your docs 12 | python: 13 | version: 3.7 14 | install: 15 | - requirements: requirements.txt 16 | - requirements: docs/requirements.txt 17 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Contribution Guidelines 2 | First off, thanks for your interest! 3 | 4 | We are doing our best to make this project even better. However, we recognize that there is ample room for improvement. 5 | We need your help to make this project even better. 6 | Let's make the best Off-Policy Evaluation software together! 7 | 8 | We prepare some conventions as follows: 9 | 10 | - [Coding Guidelines](#coding-guidelines) 11 | - [Tests](#tests) 12 | - [Continuous Integration](#continuous-integration) 13 | 14 | ## Coding Guidelines 15 | 16 | Code is formatted with [black](https://github.com/psf/black), 17 | and coding style is checked with [flake8](http://flake8.pycqa.org). 18 | 19 | After installing black, you can perform code formatting by the following command: 20 | 21 | ```bash 22 | # perform formatting recursively for the files under the current dir 23 | $ black . 24 | ``` 25 | 26 | After installing flake8, you can check the coding style by the following command: 27 | 28 | ```bash 29 | # perform checking of the coding style 30 | $ flake8 . 31 | ``` 32 | 33 | ## Tests 34 | 35 | We employ pytest as the testing framework. You can run all the tests as follows: 36 | 37 | ```bash 38 | # perform all the tests under the tests directory 39 | $ pytest . 40 | ``` 41 | 42 | ## Continuous Integration 43 | 44 | Open Bandit Pipeline uses Github Actions to perform continuous integration. 45 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.txt 2 | include *.md 3 | recursive-include obp/policy/conf * 4 | recursive-include obp/dataset/obd * 5 | include LICENSE 6 | -------------------------------------------------------------------------------- /benchmark/README.md: -------------------------------------------------------------------------------- 1 | # Benchmark Experiments 2 | --- 3 | This directory includes some benchmark experiments and demonstrations about off-policy evaluation using [the full size Open Bandit Dataset](https://research.zozo.com/data.html). The detailed description, results, and discussions can be found in [the relevant paper](https://arxiv.org/abs/2008.07146). 4 | 5 | - [`cf_policy_search`](./cf_policy_search): counterfactual policy search using OPE 6 | - [`ope`](./ope): estimation performance comparisons on a variety of OPE estimators 7 | -------------------------------------------------------------------------------- /benchmark/cf_policy_search/README.md: -------------------------------------------------------------------------------- 1 | # Counterfactual Policy Search 2 | 3 | ## Description 4 | 5 | ## Running Counterfactual Policy Search 6 | 7 | ``` 8 | for model in lightgbm 9 | do 10 | for context in 1 11 | do 12 | for camp in men 13 | do 14 | screen python run_cf_policy_search.py\ 15 | --context_set $context\ 16 | --base_model $model\ 17 | --behavior_policy bts\ 18 | --campaign $camp 19 | done 20 | done 21 | done 22 | ``` 23 | 24 | ``` 25 | python run_cf_policy_search.py --context_set 1 --base_model logistic_regression --campaign men --n_boot_samples 2 --test_size 0.9 26 | ``` 27 | 28 | ## Results 29 | -------------------------------------------------------------------------------- /benchmark/cf_policy_search/conf/hyperparams.yaml: -------------------------------------------------------------------------------- 1 | lightgbm: 2 | max_iter: 500 3 | learning_rate: 0.005 4 | max_depth: 5 5 | min_samples_leaf: 10 6 | random_state: 12345 7 | logistic_regression: 8 | max_iter: 10000 9 | C: 1000 10 | random_state: 12345 11 | random_forest: 12 | n_estimators: 500 13 | max_depth: 5 14 | min_samples_leaf: 10 15 | random_state: 12345 16 | -------------------------------------------------------------------------------- /benchmark/cf_policy_search/custom_dataset.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.decomposition import PCA 6 | 7 | from obp.dataset import OpenBanditDataset 8 | 9 | 10 | @dataclass 11 | class OBDWithInteractionFeatures(OpenBanditDataset): 12 | context_set: str = "1" 13 | 14 | def pre_process(self) -> None: 15 | 16 | if self.context_set == "1": 17 | super().pre_process() 18 | elif self.context_set == "2": 19 | self._pre_process_context_set_2() 20 | 21 | def _pre_process_context_set_1(self) -> None: 22 | """Create Context Set 1 (c.f., Section 5.2)""" 23 | 24 | user_cols = self.data.columns.str.contains("user_feature") 25 | self.context = pd.get_dummies( 26 | self.data.loc[:, user_cols], drop_first=True 27 | ).values 28 | 29 | def _pre_process_context_set_2(self) -> None: 30 | """Create Context Set 2 (c.f., Section 5.2)""" 31 | 32 | super().pre_process() 33 | affinity_cols = self.data.columns.str.contains("affinity") 34 | Xaffinity = self.data.loc[:, affinity_cols].values 35 | self.context = PCA(n_components=30).fit_transform( 36 | np.c_[self.context, Xaffinity] 37 | ) 38 | -------------------------------------------------------------------------------- /benchmark/cf_policy_search/run_cf_policy_search.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | 4 | from custom_dataset import OBDWithInteractionFeatures 5 | from joblib import delayed 6 | from joblib import Parallel 7 | import numpy as np 8 | from pandas import DataFrame 9 | from sklearn.ensemble import GradientBoostingClassifier 10 | from sklearn.ensemble import RandomForestClassifier 11 | from sklearn.linear_model import LogisticRegression 12 | import yaml 13 | 14 | from obp.ope import InverseProbabilityWeighting 15 | from obp.policy import IPWLearner 16 | 17 | 18 | # hyperparameters of the regression model used in model dependent OPE estimators 19 | with open("./conf/hyperparams.yaml", "rb") as f: 20 | hyperparams = yaml.safe_load(f) 21 | 22 | base_model_dict = dict( 23 | logistic_regression=LogisticRegression, 24 | lightgbm=GradientBoostingClassifier, 25 | random_forest=RandomForestClassifier, 26 | ) 27 | 28 | if __name__ == "__main__": 29 | parser = argparse.ArgumentParser(description="run evaluation policy selection.") 30 | parser.add_argument( 31 | "--n_runs", 32 | type=int, 33 | default=5, 34 | help="number of bootstrap sampling in the experiment.", 35 | ) 36 | parser.add_argument( 37 | "--context_set", 38 | type=str, 39 | choices=["1", "2"], 40 | required=True, 41 | help="context sets for contextual bandit policies.", 42 | ) 43 | parser.add_argument( 44 | "--base_model", 45 | type=str, 46 | choices=["logistic_regression", "lightgbm", "random_forest"], 47 | required=True, 48 | help="base model for a evaluation policy to be evaluated", 49 | ) 50 | parser.add_argument( 51 | "--behavior_policy", 52 | type=str, 53 | choices=["bts", "random"], 54 | default="random", 55 | help="behavior policy, bts or random.", 56 | ) 57 | parser.add_argument( 58 | "--campaign", 59 | type=str, 60 | choices=["all", "men", "women"], 61 | required=True, 62 | help="campaign name, men, women, or all.", 63 | ) 64 | parser.add_argument( 65 | "--test_size", 66 | type=float, 67 | default=0.5, 68 | help="the proportion of the dataset to include in the test split.", 69 | ) 70 | parser.add_argument( 71 | "--n_jobs", 72 | type=int, 73 | default=1, 74 | help="the maximum number of concurrently running jobs.", 75 | ) 76 | parser.add_argument("--random_state", type=int, default=12345) 77 | args = parser.parse_args() 78 | print(args) 79 | 80 | # configurations 81 | n_runs = args.n_runs 82 | context_set = args.context_set 83 | base_model = args.base_model 84 | behavior_policy = args.behavior_policy 85 | campaign = args.campaign 86 | test_size = args.test_size 87 | n_jobs = args.n_jobs 88 | random_state = args.random_state 89 | np.random.seed(random_state) 90 | data_path = Path("../open_bandit_dataset") 91 | 92 | # define a dataset class 93 | obd = OBDWithInteractionFeatures( 94 | behavior_policy=behavior_policy, 95 | campaign=campaign, 96 | data_path=data_path, 97 | context_set=context_set, 98 | ) 99 | # define a counterfactual policy based on IPWLearner 100 | counterfactual_policy = IPWLearner( 101 | base_model=base_model_dict[base_model](**hyperparams[base_model]), 102 | n_actions=obd.n_actions, 103 | len_list=obd.len_list, 104 | ) 105 | policy_name = f"{base_model}_{context_set}" 106 | 107 | # ground-truth policy value of the Bernoulli TS policy (the current best policy) in the test set 108 | # , which is the empirical mean of the factual (observed) rewards (on-policy estimation) 109 | ground_truth = obd.calc_on_policy_policy_value_estimate( 110 | behavior_policy="bts", 111 | campaign=campaign, 112 | data_path=data_path, 113 | test_size=test_size, 114 | is_timeseries_split=True, 115 | ) 116 | 117 | def process(b: int): 118 | # sample bootstrap from batch logged bandit feedback 119 | boot_bandit_feedback = obd.sample_bootstrap_bandit_feedback( 120 | test_size=test_size, is_timeseries_split=True, random_state=b 121 | ) 122 | # train an evaluation on the training set of the logged bandit feedback data 123 | action_dist = counterfactual_policy.fit( 124 | context=boot_bandit_feedback["context"], 125 | action=boot_bandit_feedback["action"], 126 | reward=boot_bandit_feedback["reward"], 127 | pscore=boot_bandit_feedback["pscore"], 128 | position=boot_bandit_feedback["position"], 129 | ) 130 | # make action selections (predictions) 131 | action_dist = counterfactual_policy.predict( 132 | context=boot_bandit_feedback["context_test"] 133 | ) 134 | # estimate the policy value of a given counterfactual algorithm by the three OPE estimators. 135 | ipw = InverseProbabilityWeighting() 136 | return ipw.estimate_policy_value( 137 | reward=boot_bandit_feedback["reward_test"], 138 | action=boot_bandit_feedback["action_test"], 139 | position=boot_bandit_feedback["position_test"], 140 | pscore=boot_bandit_feedback["pscore_test"], 141 | action_dist=action_dist, 142 | ) 143 | 144 | processed = Parallel( 145 | backend="multiprocessing", 146 | n_jobs=n_jobs, 147 | verbose=50, 148 | )([delayed(process)(i) for i in np.arange(n_runs)]) 149 | 150 | # save counterfactual policy evaluation results in `./logs` directory 151 | ope_results = np.zeros((n_runs, 2)) 152 | for b, estimated_policy_value_b in enumerate(processed): 153 | ope_results[b, 0] = estimated_policy_value_b 154 | ope_results[b, 1] = estimated_policy_value_b / ground_truth 155 | save_path = Path("./logs") / behavior_policy / campaign 156 | save_path.mkdir(exist_ok=True, parents=True) 157 | DataFrame( 158 | ope_results, columns=["policy_value", "relative_policy_value"] 159 | ).describe().round(6).to_csv(save_path / f"{policy_name}.csv") 160 | -------------------------------------------------------------------------------- /benchmark/ope/README.md: -------------------------------------------------------------------------------- 1 | # Benchmarking Off-Policy Evaluation 2 | 3 | This directory includes the code to replicate the benchmark experiment done in the following paper. 4 | 5 | Yuta Saito, Shunsuke Aihara, Megumi Matsutani, Yusuke Narita.
6 | **Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation**
7 | [https://arxiv.org/abs/2008.07146](https://arxiv.org/abs/2008.07146) 8 | 9 | 10 | If you find this code useful in your research then please cite: 11 | ``` 12 | @article{saito2020open, 13 | title={Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation}, 14 | author={Saito, Yuta and Shunsuke, Aihara and Megumi, Matsutani and Yusuke, Narita}, 15 | journal={arXiv preprint arXiv:2008.07146}, 16 | year={2020} 17 | } 18 | ``` 19 | 20 | ## Description 21 | We use the (full size) open bandit dataset to evaluate and compare OPE estimators in a *realistic* and *reproducible* manner. Specifically, we evaluate the estimation performance of a wide variety of OPE estimators by comparing the policy values estimated by OPE with the on-policy policy value of an evaluation policy. 22 | 23 | ### Dataset 24 | Please download the full [open bandit dataset](https://research.zozo.com/data.html) and put it in the `../open_bandit_dataset/` directory. 25 | 26 | ## Evaluating Off-Policy Estimators 27 | 28 | In the benchmark experiment, we evaluate the estimation performance of the following OPE estimators. 29 | 30 | - Direct Method (DM) 31 | - Inverse Probability Weighting (IPW) 32 | - Self-Normalized Inverse Probability Weighting (SNIPW) 33 | - Doubly Robust (DR) 34 | - Self-Normalized Doubly Robust (SNDR) 35 | - Switch Doubly Robust (Switch-DR) 36 | - Doubly Robust with Optimistic Shrinkage (DRos) 37 | 38 | See Section 2 and Appendix B of [our paper](https://arxiv.org/abs/2008.07146) or the package [documentation](https://zr-obp.readthedocs.io/en/latest/estimators.html) for the details of these estimators. 39 | 40 | For Switch-DR and DRos, we use a data-driven hyperparameter tuning method described in [Su et al.](https://arxiv.org/abs/1907.09623). 41 | For estimators except for DM, we use the true action choice probability contained in Open Bandit Dataset. 42 | For estimators except for IPW and SNIPW, we need to obtain a reward estimator. 43 | We do this by using machine learning models (such as gradient boosting) implemented in *scikit-learn*. 44 | We also use cross-fitting to avoid substantial bias from overfitting when obtaining a reward estimator. 45 | 46 | ## Requirements and Setup 47 | 48 | The Python environment is built using [poetry](https://github.com/python-poetry/poetry). You can build the same environment as in our benchmark experiment by cloning the repository and running `poetry install` directly under the folder (if you have not install poetry yet, please run `pip install poetry` first.). 49 | 50 | ```bash 51 | # clone the obp repository 52 | git clone https://github.com/st-tech/zr-obp.git 53 | cd benchmark/ope 54 | 55 | # build the environment with poetry 56 | poetry install 57 | 58 | # run the benchmark experiment 59 | poetry run python benchmark_off_policy_estimators.py ... 60 | ``` 61 | 62 | The versions of Python and used packages are as follows. 63 | 64 | ``` 65 | [tool.poetry.dependencies] 66 | python = "^3.9,<3.10" 67 | scikit-learn = "^0.24.2" 68 | pandas = "^1.3.1" 69 | numpy = "^1.21.1" 70 | matplotlib = "^3.4.2" 71 | hydra-core = "^1.0.7" 72 | pingouin = "^0.4.0" 73 | pyieoe = "^0.1.0" 74 | obp = "^0.5.0" 75 | ``` 76 | 77 | ## Files 78 | 79 | - [benchmark_ope_estimators.py](https://github.com/st-tech/zr-obp/blob/master/benchmark/ope/benchmark_ope_estimators.py) implements the experimental workflow to evaluate and compare the above OPE estimators using Open Bandit Dataset. We summarize the detailed experimental protocol for evaluating OPE estimators using real-world data [here](https://zr-obp.readthedocs.io/en/latest/evaluation_ope.html). 80 | - [benchmark_ope_estimators_hypara.py](https://github.com/st-tech/zr-obp/blob/master/benchmark/ope/benchmark_ope_estimators.py) evaluates the effect of the hyperparameter choice on the OPE performance of DRos. 81 | - [./conf/](./conf/) specifies experimental settings such as the number of random seeds. 82 | 83 | ## Scripts 84 | The experimental workflow is implemented using [Hydra](https://github.com/facebookresearch/hydra). Below, we explain important experimental configurations. 85 | 86 | ```bash 87 | # run evaluation of OPE experiments on the full open bandit dataset 88 | poetry run python benchmark_ope_estimators.py\ 89 | setting.n_seeds=$n_seeds\ 90 | setting.campaign=$campaign\ 91 | setting.behavior_policy=$behavior_policy\ 92 | setting.sample_size=$sample_size\ 93 | setting.reg_model=$reg_model\ 94 | setting.is_timeseries_split=$is_time_series_split 95 | ``` 96 | 97 | - `$n_runs` specifies the number of random seeds used in the experiment. 98 | - `$campaign` specifies the campaign considered in ZOZOTOWN and should be one of "all", "men", or "women". 99 | - `$behavior_policy` specifies which policy in Random or Bernoulli TS (bts) is used as the behavior policy. This should be either of "random" or "bts". 100 | - `$sample_size` specifies the number of samples contained in the logged bandit feedback used to conduct OPE. 101 | - `$reg_model` specifies the base ML model for defining the regression model and should be one of "logistic_regression", "random_forest", or "lightgbm". 102 | - `$is_timeseries_split` is whether the data is split based on timestamp or not. If true, the out-sample performance of OPE is tested. See the relevant paper for details. 103 | 104 | Please see [`./conf/setting/default.yaml`](./conf/setting/default.yaml) for the default experimental configurations, which are to be used when they are not overridden. 105 | 106 | It is possible to run multiple experimental settings easily by using the `--multirun (-m)` option of Hydra. 107 | For example, the following script sweeps over all simulations including the three campaigns ('all', 'men', and 'women') and two different behavior policies ('random' and 'bts'). 108 | 109 | ```bash 110 | poetry run python benchmark_ope_estimators.py setting.campaign=all,men,women setting.behavior_policy=random,bts --multirun 111 | ``` 112 | 113 | The experimental results (including the pairwise hypothesis test results) will be store in the `logs/` directory. 114 | Our benchmark results and findings can be found in Section 5 of [our paper](https://arxiv.org/abs/2008.07146). 115 | -------------------------------------------------------------------------------- /benchmark/ope/conf/config.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - estimator_hyperparams: default 3 | - reg_model_hyperparams: default 4 | - setting: default 5 | hydra: 6 | run: 7 | dir: ./logs/is_timeseries_split=${setting.is_timeseries_split}/behavior_policy=${setting.behavior_policy}/campaign=${setting.campaign}/reg_model=${setting.reg_model}/sample_size=${setting.sample_size} 8 | sweep: 9 | dir: ./logs/is_timeseries_split=${setting.is_timeseries_split}/behavior_policy=${setting.behavior_policy}/campaign=${setting.campaign}/reg_model=${setting.reg_model}/sample_size=${setting.sample_size} 10 | subdir: ./ 11 | -------------------------------------------------------------------------------- /benchmark/ope/conf/estimator_hyperparams/default.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | # a set of candidate hyperparams 3 | lambdas: 4 | - 1 5 | - 5 6 | - 10 7 | - 50 8 | - 100 9 | - 500 10 | - 1000 11 | - 5000 12 | - 10000 13 | -------------------------------------------------------------------------------- /benchmark/ope/conf/reg_model_hyperparams/default.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | lightgbm: 3 | max_iter: 100 4 | learning_rate: 0.01 5 | max_depth: 5 6 | min_samples_leaf: 10 7 | random_state: 12345 8 | logistic_regression: 9 | max_iter: 10000 10 | C: 1000 11 | random_state: 12345 12 | random_forest: 13 | n_estimators: 100 14 | max_depth: 5 15 | min_samples_leaf: 10 16 | random_state: 12345 17 | -------------------------------------------------------------------------------- /benchmark/ope/conf/setting/default.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | experiment: default 3 | # dataset settings 4 | n_seeds: 200 5 | sample_size: 10000 6 | campaign: men 7 | behavior_policy: random 8 | # regression model settings 9 | reg_model: lightgbm 10 | n_folds: 2 # cross-fitting 11 | # other experimental settings 12 | is_full_obd: True 13 | is_timeseries_split: False 14 | test_size: 0.3 15 | random_state: 12345 16 | -------------------------------------------------------------------------------- /benchmark/ope/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "benchmark/ope" 3 | version = "0.1.0" 4 | description = "benchmarking OPE estimators on Open Bandit Dataset" 5 | authors = ["usaito "] 6 | 7 | [tool.poetry.dependencies] 8 | python = "^3.9,<3.10" 9 | scikit-learn = "^0.24.2" 10 | pandas = "^1.3.1" 11 | numpy = "^1.21.1" 12 | matplotlib = "^3.4.2" 13 | pingouin = "^0.4.0" 14 | pyieoe = "^0.1.0" 15 | obp = "^0.5.0" 16 | hydra-core = "1.0.7" 17 | 18 | [tool.poetry.dev-dependencies] 19 | flake8 = "^3.9.2" 20 | black = "^21.7b0" 21 | 22 | [build-system] 23 | requires = ["poetry-core>=1.0.0"] 24 | build-backend = "poetry.core.masonry.api" 25 | -------------------------------------------------------------------------------- /docs/_static/images/dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/docs/_static/images/dataset.png -------------------------------------------------------------------------------- /docs/_static/images/evaluation_of_ope_algo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/docs/_static/images/evaluation_of_ope_algo.png -------------------------------------------------------------------------------- /docs/_static/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/docs/_static/images/logo.png -------------------------------------------------------------------------------- /docs/_static/images/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/docs/_static/images/overview.png -------------------------------------------------------------------------------- /docs/_static/images/recommended_fashion_items.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/docs/_static/images/recommended_fashion_items.png -------------------------------------------------------------------------------- /docs/_static/images/related_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/docs/_static/images/related_data.png -------------------------------------------------------------------------------- /docs/_static/images/related_packages.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/docs/_static/images/related_packages.png -------------------------------------------------------------------------------- /docs/_static/images/statistics_of_obd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/docs/_static/images/statistics_of_obd.png -------------------------------------------------------------------------------- /docs/about.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | About 3 | =============== 4 | Motivated by the paucity of real-world data and implementation enabling the evaluation and comparison of OPE, we release the following open-source dataset and pipeline software for research uses. 5 | 6 | 7 | Open Bandit Dataset (OBD) 8 | ------------------------------ 9 | 10 | *Open Bandit Dataset* is a public real-world logged bandit feedback data. 11 | The dataset is provided by `ZOZO, Inc. `_, the largest Japanese fashion e-commerce company with over 5 billion USD market capitalization (as of May 2020). 12 | The company uses multi-armed bandit algorithms to recommend fashion items to users in a large-scale fashion e-commerce platform called `ZOZOTOWN `_. 13 | The following figure presents examples of displayed fashion items as actions. 14 | 15 | .. image:: ./_static/images/recommended_fashion_items.png 16 | :scale: 25% 17 | :align: center 18 | 19 | We collected the data in a 7-days experiment in late November 2019 on three campaigns, corresponding to "all", "men's", and "women's" items, respectively. 20 | Each campaign randomly uses either the Random policy or the Bernoulli Thompson Sampling (Bernoulli TS) policy for each user impression. 21 | Note that we pre-trained Bernoulli TS for over a month before the data collection process and the policy well converges to a fixed one. 22 | Thus, we suppose our data is generated by a fixed policy and apply the standard OPE formulation that assumes static behavior and evaluation policies. 23 | These policies select three of the possible fashion items to each user. 24 | Let :math:`\mathcal{I}:=\{0,\ldots,n\}` be a set of :math:`n+1` items and :math:`\mathcal{K}:=\{0,\ldots,k\}` be a set of :math:`k+1` positions. 25 | The above figure shows that :math:`k+1=3` for our data. 26 | We assume that the reward (click indicator) depends only on the item and its position, which is a general assumption on the click generative model in the web industry:cite:`Li2018`. 27 | Under the assumption, the action space is simply the product of the item set and the position set, i.e., :math:`\calA = \mathcal{I} \times \mathcal{K}`. 28 | Then, we can apply the standard OPE setup and estimators to our setting. 29 | We describe some statistics of the dataset in the following. 30 | 31 | .. image:: ./_static/images/statistics_of_obd.png 32 | :scale: 25% 33 | :align: center 34 | 35 | The data is large and contains many millions of recommendation instances. 36 | It also includes the true action choice probabilities by behavior policies computed by Monte Carlo simulations based on the policy parameters (e.g., parameters of the beta distribution used by Bernoulli TS) used during the data collection process. 37 | The number of actions is also sizable, so this setting is challenging for bandit algorithms and their OPE. 38 | We share the full version of our data at https://research.zozo.com/data.html 39 | 40 | Open Bandit Pipeline (OBP) 41 | --------------------------------- 42 | 43 | *Open Bandit Pipeline* is a series of implementations of dataset preprocessing, policy learning, and evaluation of OPE estimators. 44 | This pipeline allows researchers to focus on building their bandit algorithm or OPE estimator and easily compare them with others’ methods in realistic and reproducible ways. 45 | Thus, it facilitates reproducible research on bandit algorithms and off-policy evaluation. 46 | 47 | .. image:: ./_static/images/overview.png 48 | :scale: 40% 49 | :align: center 50 | 51 | Open Bandit Pipeline consists of the following main modules. 52 | 53 | - **dataset module**: This module provides a data loader for Open Bandit Dataset and a flexible interface for handling logged bandit feedback. It also provides tools to generate synthetic bandit datasets. 54 | - **policy module**: This module provides interfaces for online and offline bandit algorithms. It also implements several standard algorithms. 55 | - **simulator module**: This module provides functions for conducting offline bandit simulation. 56 | - **ope module**: This module provides interfaces for OPE estimators. It also implements several standard OPE estimators. 57 | 58 | In addition to the above algorithms and estimators, the pipeline also provides flexible interfaces. 59 | Therefore, researchers can easily implement their own algorithms or estimators and evaluate them with our data and pipeline. 60 | Moreover, the pipeline provides an interface for handling logged bandit feedback datasets. 61 | Thus, practitioners can combine their own datasets with the pipeline and easily evaluate bandit algorithms' performances in their settings. 62 | 63 | Please see `package reference `_ for detailed information about Open Bandit Pipeline. 64 | 65 | To our knowledge, our real-world dataset and pipeline are the first to include logged bandit datasets collected by running *multiple* different policies, policy implementations used in production, and their ground-truth policy values. 66 | These features enable the **evaluation of OPE** for the first time. 67 | -------------------------------------------------------------------------------- /docs/estimators.rst: -------------------------------------------------------------------------------- 1 | ================================================ 2 | Estimators 3 | ================================================ 4 | 5 | 6 | Direct Method (DM) 7 | -------------------------------------- 8 | A widely-used method, DM, first learns a supervised machine learning model, such as random forest, ridge regression, and gradient boosting, to estimate the mean reward function. 9 | DM then uses it to estimate the policy value as 10 | 11 | .. math:: 12 | \hat{V}_{\mathrm{DM}} (\pi_e; \calD, \hat{q}) := \E_{\calD} [ \hat{q} (x_t, \pi_e) ], 13 | 14 | where :math:`\hat{q}(a \mid x)` is the estimated reward function. 15 | If :math:`\hat{q}(a \mid x)` is a good approximation to the mean reward function, this estimator accurately estimates the policy value of the evaluation policy :math:`V^{\pi}`. 16 | If :math:`\hat{q}(a \mid x)` fails to approximate the mean reward function well, however, the final estimator is no longer consistent. 17 | The model misspecification issue is problematic because the extent of misspecification cannot be easily quantified from data :cite:`Farajtabar2018`. 18 | 19 | 20 | Inverse Probability Weighting (IPW) 21 | -------------------------------------- 22 | To alleviate the issue with DM, researchers often use another estimator called IPW :cite:`Precup2000` :cite:`Strehl2010`. 23 | IPW re-weights the rewards by the ratio of the evaluation policy and behavior policy as 24 | 25 | .. math:: 26 | \hat{V}_{\mathrm{IPW}} (\pi_e; \calD) := \E_{\calD} [w(x_t,a_t) r_t ], 27 | 28 | where :math:`w(x,a) := \pi_e(a \mid x) / \pi_b(a \mid x)` is the importance weight given :math:`x` and :math:`a`. 29 | When the behavior policy is known, the IPW estimator is unbiased and consistent for the policy value. 30 | However, it can have a large variance, especially when the evaluation policy significantly deviates from the behavior policy. 31 | 32 | 33 | Doubly Robust (DR) 34 | -------------------------------------- 35 | The final approach is DR :cite:`Dudik2014`, which combines the above two estimators as 36 | 37 | .. math:: 38 | \hat{V}_{\mathrm{DR}} := \E_{\calD} [ \hat{q} (x_t, \pi_e) + w(x_t,a_t) (r_t-\hat{q}(x_t, a_t) ) ]. 39 | 40 | DR mimics IPW to use a weighted version of rewards, but DR also uses the estimated mean reward function as a control variate to decrease the variance. 41 | It preserves the consistency of IPW if either the importance weight or the mean reward estimator is accurate (a property called *double robustness*). 42 | Moreover, DR is *semiparametric efficient* :cite:`Narita2019` when the mean reward estimator is correctly specified. 43 | On the other hand, when it is wrong, this estimator can have larger asymptotic mean-squared-error than IPW :cite:`Kallus2019` and perform poorly in practice :cite:`Kang2007`. 44 | 45 | 46 | 47 | Self-Normalized Estimators 48 | -------------------------------------- 49 | Self-Normalized Inverse Probability Weighting (SNIPW) is an approach to address the variance issue with the original IPW. 50 | It estimates the policy value by dividing the sum of weighted rewards by the sum of importance weights as: 51 | 52 | .. math:: 53 | \hat{V}_{\mathrm{SNIPW}} (\pi_e; \calD) :=\frac{\E_{\calD} [ w(x_t,a_t) r_t ]}{\E_{\calD} [ w(x_t,a_t) ]}. 54 | 55 | SNIPW is more stable than IPW, because estimated policy value by SNIPW is bounded in the support of rewards and its conditional variance given action and context is bounded by the conditional variance of the rewards:cite:`kallus2019`. 56 | IPW does not have these properties. 57 | We can define Self-Normalized Doubly Robust (SNDR) in a similar manner as follows. 58 | 59 | .. math:: 60 | \hat{V}_{\mathrm{SNDR}} (\pi_e; \calD) := \E_{\calD} \left[\hat{q}(x_t, \pi_e) + \frac{w(x_t,a_t) (r_t-\hat{q}(x_t, a_t) )}{\E_{\calD} [ w(x_t,a_t) ]} \right]. 61 | 62 | 63 | Switch Estimators 64 | -------------------------------------- 65 | The DR estimator can still be subject to the variance issue, particularly when the importance weights are large due to low overlap. 66 | Switch-DR aims to reduce the effect of the variance issue by using DM where importance weights are large as: 67 | 68 | .. math:: 69 | \hat{V}_{\mathrm{SwitchDR}} (\pi_e; \calD, \hat{q}, \tau) := \E_{\calD} \left[ \hat{q}(x_t, \pi_e) + w(x_t,a_t) (r_t-\hat{q}(x_t, a_t) ) \mathbb{I}\{ w(x_t,a_t) \le \tau \} \right], 70 | 71 | where :math:`\mathbb{I} \{\cdot\}` is the indicator function and :math:`\tau \ge 0` is a hyperparameter. 72 | Switch-DR interpolates between DM and DR. 73 | When :math:`\tau=0`, it coincides with DM, while :math:`\tau \to \infty` yields DR. 74 | This estimator is minimax optimal when :math:`\tau` is appropriately chosen :cite:`Wang2016`. 75 | 76 | 77 | More Robust Doubly Robust (MRDR) 78 | -------------------------------------- 79 | MRDR uses a specialized reward estimator (:math:`\hat{q}_{\mathrm{MRDR}}`) that minimizes the variance of the resulting policy value estimator:cite:`Farajtabar2018`. 80 | This estimator estimates the policy value as: 81 | 82 | .. math:: 83 | \hat{V}_{\mathrm{MRDR}} (\pi_e; \calD, \hat{q}_{\mathrm{MRDR}}) := \hat{V}_{\mathrm{DR}} (\pi_e; \calD, \hat{q}_{\mathrm{MRDR}}), 84 | 85 | where :math:`\mathcal{Q}` is a function class for the reward estimator. 86 | When :math:`\mathcal{Q}` is well-specified, then :math:`\hat{q}_{\mathrm{MRDR}} = q`. 87 | Here, even if :math:`\mathcal{Q}` is misspecified, the derived reward estimator is expected to behave well since the target function is the resulting variance. 88 | 89 | 90 | Doubly Robust with Optimistic Shrinkage (DRos) 91 | ---------------------------------------------------------------------------- 92 | :cite:`Su2019` proposes DRs based on a new weight function :math:`w_o: \calX \times \calA \rightarrow \mathbb{R}_{+}` that directly minimizes sharp bounds on the MSE of the resulting estimator. 93 | DRs is defined as 94 | 95 | .. math:: 96 | \hat{V}_{\mathrm{DRs}} (\pi_e; \calD, \hat{q}, \lambda) := \E_{\calD} [ \hat{q} (x_t, \pi_e) + w_o (x_t, a_t; \lambda) (r_t-\hat{q}(x_t, a_t) ) ], 97 | 98 | where :math:`\lambda \ge 0` is a hyperparameter and the new weight is 99 | 100 | .. math:: 101 | w_o (x, a; \lambda) := \frac{\lambda}{w^{2}(x, a)+\lambda} w(x, a). 102 | 103 | When :math:`\lambda = 0`, :math:`w_o (x, a; \lambda) = 0` leading to the standard DM. 104 | On the other hand, as :math:`\lambda \rightarrow \infty`, :math:`w_o (x, a; \lambda) = w(x,a)` leading to the original DR. 105 | -------------------------------------------------------------------------------- /docs/evaluation_ope.rst: -------------------------------------------------------------------------------- 1 | ================================================ 2 | Evaluation of OPE 3 | ================================================ 4 | Here we describe an experimental protocol to evaluate OPE estimators and use it to compare a wide variety of existing estimators. 5 | 6 | We can empirically evaluate OPE estimators' performances by using two sources of logged bandit feedback collected by two different policies :math:`\pi^{(he)}` (hypothetical evaluation policy) and :math:`\pi^{(hb)}` (hypothetical behavior policy). 7 | We denote log data generated by :math:`\pi^{(he)}` and :math:`\pi^{(hb)}` as :math:`\calD^{(he)} := \{ (x^{(he)}_t, a^{(he)}_t, r^{(he)}_t) \}_{t=1}^T` and :math:`\calD^{(hb)} := \{ (x^{(hb)}_t, a^{(hb)}_t, r^{(hb)}_t) \}_{t=1}^T`, respectively. 8 | By applying the following protocol to several different OPE estimators, we can compare their estimation performances: 9 | 10 | 11 | 1. Define the evaluation and test sets as: 12 | 13 | * in-sample case: :math:`\calD_{\mathrm{ev}} := \calD^{(hb)}_{1:T}`, :math:`\calD_{\mathrm{te}} := \calD^{(he)}_{1:T}` 14 | * out-sample case: :math:`\calD_{\mathrm{ev}} := \calD^{(hb)}_{1:\tilde{t}}`, :math:`\calD_{\mathrm{te}} := \calD^{(he)}_{\tilde{t}+1:T}` 15 | 16 | where :math:`\calD_{a:b} := \{ (x_t,a_t,r_t) \}_{t=a}^{b}`. 17 | 18 | 2. Estimate the policy value of :math:`\pi^{(he)}` using :math:`\calD_{\mathrm{ev}}` by an estimator :math:`\hat{V}`. We can represent an estimated policy value by :math:`\hat{V}` as :math:`\hat{V} (\pi^{(he)}; \calD_{\mathrm{ev}})`. 19 | 20 | 3. Estimate :math:`V(\pi^{(he)})` by the *on-policy estimation* and regard it as the ground-truth as 21 | 22 | .. math:: 23 | V_{\mathrm{on}} (\pi^{(he)}; \calD_{\mathrm{te}}) := \E_{\calD_{\mathrm{te}}} [r^{(he)}_t]. 24 | 25 | 4. Compare the off-policy estimate :math:`\hat{V}(\pi^{(he)}; \calD_{\mathrm{ev}})` with its ground-truth :math:`V_{\mathrm{on}} (\pi^{(he)}; \calD_{\mathrm{te}})`. We can evaluate the estimation accuracy of :math:`\hat{V}` by the following *relative estimation error* (relative-EE): 26 | 27 | .. math:: 28 | \textit{relative-EE} (\hat{V}; \calD_{\mathrm{ev}}) := \left| \frac{\hat{V} (\pi^{(he)}; \calD_{\mathrm{ev}}) - V_{\mathrm{on}} (\pi^{(he)}; \calD_{\mathrm{te}}) }{V_{\mathrm{on}} (\pi^{(he)}; \calD_{\mathrm{te}})} \right|. 29 | 30 | 5. To estimate standard deviation of relative-EE, repeat the above process several times with different bootstrap samples of the logged bandit data created by sampling data *with replacement* from :math:`\calD_{\mathrm{ev}}`. 31 | 32 | We call the problem setting **without** the sample splitting by time series as in-sample case. 33 | In contrast, we call that **with** the sample splitting as out-sample case where OPE estimators aim to estimate the policy value of an evaluation policy in the test data. 34 | 35 | The following algorithm describes the detailed experimental protocol to evaluate OPE estimators. 36 | 37 | .. image:: ./_static/images/evaluation_of_ope_algo.png 38 | :scale: 45% 39 | :align: center 40 | 41 | 42 | Using the above protocol, our real-world data, and pipeline, we have performed extensive benchmark experiments on a variety of existing off-policy estimators. 43 | The experimental results and the relevant discussion can be found in `our paper `_. 44 | The code for running the benchmark experiments can be found at `zr-obp/benchmark/ope `_. 45 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. obp documentation master file, created by 2 | sphinx-quickstart on Tue Jun 23 17:55:21 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | .. image:: ./_static/images/logo.png 7 | :scale: 20% 8 | :align: center 9 | 10 | Open Bandit Pipeline; a python library for bandit algorithms and off-policy evaluation 11 | ========================================================================================= 12 | 13 | Overview 14 | ~~~~~~~~~~~~ 15 | 16 | *Open Bandit Pipeline (OBP)* is an open source python library for bandit algorithms and off-policy evaluation (OPE). 17 | The toolkit comes with the *Open Bandit Dataset* , a large-scale logged bandit feedback data collected on a fashion e-commerce platform, `ZOZOTOWN `_. 18 | The purpose of the open data and library is to enable easy, realistic, and reproducible evaluation of bandit algorithms and OPE. 19 | OBP has a series of implementations of dataset preprocessing, bandit policy interfaces, and a variety of OPE estimators. 20 | 21 | Our open data and pipeline facilitate evaluation and comparison related to the following research topics. 22 | 23 | * **Bandit Algorithms**: Our data include the probabilities of each action being selected by behavior policies (the true propensity scores). 24 | Therefore, it enables the evaluation of new online bandit algorithms, including contextual and combinatorial algorithms, in a large real-world setting. 25 | 26 | 27 | * **Off-Policy Evaluation**: We present implementations of behavior policies used when collecting datasets as a part of our pipeline. 28 | Our open data also contains logged bandit feedback data generated by multiple behavior policies. 29 | Therefore, it enables the evaluation of off-policy evaluation with ground-truths for the performances of evaluation policies. 30 | 31 | This website contains pages with example analyses to help demonstrate the usage of this library. 32 | Additionally, it presents examples of evaluating counterfactual bandit algorithms and OPE itself. 33 | The reference page contains the full reference documentation for the current functions of this toolkit. 34 | 35 | Algorithms and OPE Estimators Supported 36 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 37 | 38 | Bandit Algorithms 39 | ---------------------- 40 | 41 | * Online 42 | 43 | * Context-free 44 | 45 | * Random 46 | * Epsilon Greedy 47 | * Bernoulli Thompson Sampling 48 | 49 | * Contextual (Linear) 50 | 51 | * Linear Epsilon Greedy 52 | * Linear Thompson Sampling :cite:`Agrawal2013` 53 | * Linear Upper Confidence Bound :cite:`Li2010` 54 | 55 | * Contextual (Logistic) 56 | 57 | * Logistic Epsilon Greedy 58 | * Logistic Thompson Sampling :cite:`Chapelle2011` 59 | * Logistic Upper Confidence Bound :cite:`Mahajan2012` 60 | 61 | * Offline (Off-Policy Learning) :cite:`Dudik2014` 62 | 63 | * Inverse Probability Weighting 64 | 65 | OPE Estimators 66 | ---------------------- 67 | * Replay Method (RM) :cite:`Li2012` 68 | * Direct Method (DM) :cite:`Beygelzimer2009` 69 | * Inverse Probability Weighting (IPW) :cite:`Precup2000` :cite:`Strehl2010` 70 | * Self-Normalized Inverse Probability Weighting (SNIPW) :cite:`Swaminathan2015b` 71 | * Doubly Robust (DR) :cite:`Dudik2014` 72 | * Switch Estimators :cite:`Wang2016` 73 | * Doubly Robust with Optimistic Shrinkage (DRos) :cite:`Su2019` 74 | * More Robust Doubly Robust (MRDR) :cite:`Farajtabar2018` 75 | * Double Machine Learning (DML) :cite:`Narita2020` 76 | 77 | 78 | Citation 79 | ~~~~~~~~~~~~ 80 | If you use our dataset and pipeline in your work, please cite our paper below. 81 | 82 | ``` 83 | @article{saito2020open, 84 | title={Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation}, 85 | author={Saito, Yuta and Shunsuke, Aihara and Megumi, Matsutani and Yusuke, Narita}, 86 | journal={arXiv preprint arXiv:2008.07146}, 87 | year={2020} 88 | } 89 | ``` 90 | 91 | Google Group 92 | ~~~~~~~~~~~~~~~~ 93 | If you are interested in the Open Bandit Project, we can follow the updates at its google group: https://groups.google.com/g/open-bandit-project 94 | 95 | Contact 96 | ~~~~~~~~~~~~ 97 | For any question about the paper, data, and pipeline, feel free to contact: saito@hanjuku-kaso.com 98 | 99 | Contribution 100 | ~~~~~~~~~~~~~~ 101 | Any contributions to the Open Bandit Pipeline are more than welcome! 102 | Please refer to `CONTRIBUTING.md `_ for general guidelines how to contribute to the project. 103 | 104 | 105 | Table of Contents 106 | ~~~~~~~~~~~~~~~~~~~~~~~~ 107 | 108 | .. toctree:: 109 | :maxdepth: 3 110 | :caption: Introduction: 111 | 112 | about 113 | related 114 | 115 | .. toctree:: 116 | :maxdepth: 3 117 | :caption: Off-Policy Evaluation (OPE): 118 | 119 | ope 120 | estimators 121 | evaluation_ope 122 | 123 | .. toctree:: 124 | :maxdepth: 3 125 | :caption: Getting Started: 126 | 127 | installation 128 | quickstart 129 | 130 | .. toctree:: 131 | :maxdepth: 3 132 | :caption: Package Reference: 133 | 134 | obp 135 | 136 | .. toctree:: 137 | :caption: Others: 138 | 139 | Github 140 | Dataset 141 | Google Group 142 | LICENSE 143 | references 144 | 145 | 146 | 147 | Indices and tables 148 | ================== 149 | 150 | * :ref:`genindex` 151 | * :ref:`modindex` 152 | * :ref:`search` 153 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | 2 | ============ 3 | Installation 4 | ============ 5 | 6 | 7 | ``obp`` is available on PyPI, and can be installed from ``pip`` or source as follows: 8 | 9 | From ``pip``: 10 | 11 | .. code-block:: bash 12 | 13 | pip install obp 14 | 15 | From source: 16 | 17 | .. code-block:: bash 18 | 19 | git clone https://github.com/st-tech/zr-obp 20 | cd zr-obp 21 | python setup.py install 22 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/obp.rst: -------------------------------------------------------------------------------- 1 | OBP Package Reference 2 | ======================= 3 | 4 | ope module 5 | ------------ 6 | .. autosummary:: 7 | :toctree: _autosummary 8 | 9 | obp.ope.estimators 10 | obp.ope.meta 11 | obp.ope.regression_model 12 | 13 | 14 | policy module 15 | --------------- 16 | .. autosummary:: 17 | :toctree: _autosummary 18 | 19 | obp.policy.base 20 | obp.policy.contextfree 21 | obp.policy.linear 22 | obp.policy.logistic 23 | obp.policy.offline 24 | 25 | 26 | dataset module 27 | --------------- 28 | .. autosummary:: 29 | :toctree: _autosummary 30 | 31 | obp.dataset.base 32 | obp.dataset.real 33 | obp.dataset.synthetic 34 | obp.dataset.multiclass 35 | 36 | 37 | simulator module 38 | ------------------ 39 | .. autosummary:: 40 | :toctree: _autosummary 41 | 42 | obp.simulator.simulator 43 | 44 | 45 | others 46 | --------------- 47 | .. autosummary:: 48 | :toctree: _autosummary 49 | 50 | obp.utils 51 | -------------------------------------------------------------------------------- /docs/ope.rst: -------------------------------------------------------------------------------- 1 | ================================================ 2 | Overview 3 | ================================================ 4 | 5 | 6 | Setup 7 | ------ 8 | 9 | We consider a general contextual bandit setting. 10 | Let :math:`r \in [0, R_{\mathrm{max}}]` denote a reward or outcome variable (e.g., whether a fashion item as an action results in a click). 11 | We let :math:`x \in \calX` be a context vector (e.g., the user's demographic profile) that the decision maker observes when picking an action. 12 | Rewards and contexts are sampled from the unknown probability distributions :math:`p (r \mid x, a)` and :math:`p(x)`, respectively. 13 | Let :math:`\calA:=\{0,\ldots,m\}` be a finite set of :math:`m+1` actions. 14 | We call a function :math:`\pi: \calX \rightarrow \Delta(\calA)` a *policy*. 15 | It maps each context :math:`x \in \calX` into a distribution over actions, where :math:`\pi (a \mid x)` is the probability of taking action :math:`a` given :math:`x`. 16 | 17 | Let :math:`\calD := \{(x_t,a_t,r_t)\}_{t=1}^{T} ` be historical logged bandit feedback with :math:`T` rounds of observations. 18 | :math:`a_t` is a discrete variable indicating which action in :math:`\calA` is chosen in round :math:`t`. 19 | :math:`r_t` and :math:`x_t` denote the reward and the context observed in round :math:`t`, respectively. 20 | We assume that a logged bandit feedback is generated by a behavior policy :math:`\pi_b` as follows: 21 | 22 | .. math:: 23 | \{(x_t,a_t,r_t)\}_{i=1}^{T} \sim \prod_{i=1}^{T} p(x_t) \pi_b (a_t \mid x_t) p(r_t \mid x_t, a_t), 24 | 25 | where each context-action-reward triplets are sampled independently from the product distribution. 26 | Note that we assume :math:`a_t` is independent of :math:`r_t` conditional on :math:`x_t`. 27 | 28 | We let :math:`\pi(x,a,r) := p(x) \pi (a \mid x) p(r \mid x, a)` be the product distribution by a policy :math:`\pi`. 29 | For a function :math:`f(x,a,r)`, we use :math:`\E_{\calD} [f] := |\calD|^{-1} \sum_{(x_t, a_t, r_t) \in \calD} f(x_t, a_t, r_t)` to denote its empirical expectation over :math:`T` observations in :math:`\calD`. 30 | Then, for a function :math:`g(x,a)`, we let :math:`g(x,\pi) := \E_{a \sim \pi(a|x)}[g(x,a) \mid x]`. 31 | We also use :math:`q(x,a) := \E_{r \sim p(r|x,a)} [ r \mid x, a ]` to denote the mean reward function. 32 | 33 | 34 | Estimation Target 35 | ------------------------- 36 | We are interested in using the historical logged bandit data to estimate the following *policy value* of any given *evaluation policy* :math:`\pi_e` which might be different from :math:`\pi_b`: 37 | 38 | .. math:: 39 | V (\pi_e) := \E_{(x,a,r) \sim \pi_e (x,a,r)} [r] . 40 | 41 | where the last equality uses the independence of :math:`A` and :math:`Y(\cdot)` conditional on :math:`X` and the definition of :math:`\pi_b(\cdot|X)`. 42 | We allow the evaluation policy :math:`\pi_e` to be degenerate, i.e., it may choose a particular action with probability 1. 43 | Estimating :math:`V(\pi_e)` before implementing :math:`\pi_e` in an online environment is valuable because :math:`\pi_e` may perform poorly and damage user satisfaction. 44 | Additionally, it is possible to select an evaluation policy that maximizes the policy value by comparing their estimated performances without having additional implementation cost. 45 | -------------------------------------------------------------------------------- /docs/quickstart.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Quickstart 3 | ============ 4 | 5 | We show an example of conducting offline evaluation of the performance of Bernoulli Thompson Sampling (BernoulliTS) as an evaluation policy using *Inverse Probability Weighting (IPW)* 6 | and logged bandit feedback generated by the Random policy (behavior policy). 7 | We see that only ten lines of code are sufficient to complete OPE from scratch. 8 | In this example, it is assumed that the `obd/random/all` directory exists under the present working directory. 9 | Please clone `the repository `_ in advance. 10 | 11 | .. code-block:: python 12 | 13 | # a case for implementing OPE of the BernoulliTS policy using log data generated by the Random policy 14 | >>> from obp.dataset import OpenBanditDataset 15 | >>> from obp.policy import BernoulliTS 16 | >>> from obp.ope import OffPolicyEvaluation, InverseProbabilityWeighting as IPW 17 | 18 | # (1) Data loading and preprocessing 19 | >>> dataset = OpenBanditDataset(behavior_policy='random', campaign='all') 20 | >>> bandit_feedback = dataset.obtain_batch_bandit_feedback() 21 | 22 | # (2) Off-Policy Learning 23 | >>> evaluation_policy = BernoulliTS( 24 | n_actions=dataset.n_actions, 25 | len_list=dataset.len_list, 26 | is_zozotown_prior=True, 27 | campaign="all", 28 | random_state=12345 29 | ) 30 | >>> action_dist = evaluation_policy.compute_batch_action_dist( 31 | n_sim=100000, n_rounds=bandit_feedback["n_rounds"] 32 | ) 33 | 34 | # (3) Off-Policy Evaluation 35 | >>> ope = OffPolicyEvaluation(bandit_feedback=bandit_feedback, ope_estimators=[IPW()]) 36 | >>> estimated_policy_value = ope.estimate_policy_values(action_dist=action_dist) 37 | 38 | # estimated performance of BernoulliTS relative to the ground-truth performance of Random 39 | >>> relative_policy_value_of_bernoulli_ts = estimated_policy_value['ipw'] / bandit_feedback['reward'].mean() 40 | >>> print(relative_policy_value_of_bernoulli_ts) 41 | 1.198126... 42 | 43 | A detailed introduction with the same example can be found at `quickstart `_. 44 | Below, we explain some important features in the example flow. 45 | 46 | 47 | Data loading and preprocessing 48 | ------------------------------------ 49 | 50 | We prepare an easy-to-use data loader for Open Bandit Dataset. 51 | 52 | .. code-block:: python 53 | 54 | # load and preprocess raw data in "ALL" campaign collected by the Random policy 55 | >>> dataset = OpenBanditDataset(behavior_policy='random', campaign='all') 56 | # obtain logged bandit feedback generated by the behavior policy 57 | >>> bandit_feedback = dataset.obtain_batch_bandit_feedback() 58 | 59 | >>> print(bandit_feedback.keys()) 60 | dict_keys(['n_rounds', 'n_actions', 'action', 'position', 'reward', 'pscore', 'context', 'action_context']) 61 | 62 | Users can implement their own feature engineering in the :class:`pre_process` method of :class:`obp.dataset.OpenBanditDataset` class. 63 | We show an example of implementing some new feature engineering processes in `custom_dataset.py `_. 64 | 65 | Moreover, by following the interface of :class:`obp.dataset.BaseBanditDataset` class, one can handle their own or future open datasets for bandit algorithms other than our OBD. 66 | 67 | Off-Policy Learning 68 | ------------------------------ 69 | 70 | After preparing a dataset, we now compute the action choice probability of BernoulliTS in the ZOZOTOWN production. 71 | Then, we can use it as the evaluation policy. 72 | 73 | .. code-block:: python 74 | 75 | # define evaluation policy (the Bernoulli TS policy here) 76 | # by activating the `is_zozotown_prior` argument of BernoulliTS, we can replicate BernoulliTS used in ZOZOTOWN production. 77 | >>> evaluation_policy = BernoulliTS( 78 | n_actions=dataset.n_actions, 79 | len_list=dataset.len_list, 80 | is_zozotown_prior=True, # replicate the policy in the ZOZOTOWN production 81 | campaign="all", 82 | random_state=12345 83 | ) 84 | # compute the distribution over actions by the evaluation policy using Monte Carlo simulation 85 | # action_dist is an array of shape (n_rounds, n_actions, len_list) 86 | # representing the distribution over actions made by the evaluation policy 87 | >>> action_dist = evaluation_policy.compute_batch_action_dist( 88 | n_sim=100000, n_rounds=bandit_feedback["n_rounds"] 89 | ) 90 | 91 | The :class:`compute_batch_action_dist` method of :class:`BernoulliTS` computes the action choice probabilities based on given hyperparameters of the beta distribution. 92 | :class:`action_dist` is an array representing the distribution over actions made by the evaluation policy. 93 | 94 | 95 | Off-Policy Evaluation 96 | ------------------------------ 97 | 98 | Our final step is **off-policy evaluation** (OPE), which attempts to estimate the performance of decision making policy using log data generated by offline bandit simulation. 99 | Our pipeline also provides an easy procedure for doing OPE as follows. 100 | 101 | .. code-block:: python 102 | 103 | # estimate the policy value of BernoulliTS based on the distribution over actions by that policy 104 | # it is possible to set multiple OPE estimators to the `ope_estimators` argument 105 | >>> ope = OffPolicyEvaluation(bandit_feedback=bandit_feedback, ope_estimators=[ReplayMethod()]) 106 | >>> estimated_policy_value = ope.estimate_policy_values(action_dist=action_dist) 107 | >>> print(estimated_policy_value) 108 | {'ipw': 0.004553...} # dictionary containing estimated policy values by each OPE estimator. 109 | 110 | # compare the estimated performance of BernoulliTS (evaluation policy) 111 | # with the ground-truth performance of Random (behavior policy) 112 | >>> relative_policy_value_of_bernoulli_ts = estimated_policy_value['ipw'] / bandit_feedback['reward'].mean() 113 | # our OPE procedure suggests that BernoulliTS improves Random by 19.81% 114 | >>> print(relative_policy_value_of_bernoulli_ts) 115 | 1.198126... 116 | 117 | Users can implement their own OPE estimator by following the interface of :class:`obp.ope.BaseOffPolicyEstimator` class. 118 | :class:`obp.ope.OffPolicyEvaluation` class summarizes and compares the estimated policy values by several off-policy estimators. 119 | A detailed usage of this class can be found at `quickstart `_. 120 | :class:`bandit_feedback['reward'].mean()` is the empirical mean of factual rewards (on-policy estimate of the policy value) in the log and thus is the ground-truth performance of the behavior policy (the Random policy in this example.). 121 | -------------------------------------------------------------------------------- /docs/references.rst: -------------------------------------------------------------------------------- 1 | References 2 | ========== 3 | 4 | 5 | Papers 6 | ------ 7 | 8 | .. bibliography:: refs.bib 9 | :style: unsrt 10 | 11 | 12 | Projects 13 | ---------- 14 | 15 | This project is strongly inspired by **Open Graph Benchmark** --a collection of benchmark datasets, data loaders, and evaluators for graph machine learning: 16 | `[github] `_ `[project page] `_ `[paper] `_. 17 | -------------------------------------------------------------------------------- /docs/related.rst: -------------------------------------------------------------------------------- 1 | =================== 2 | Related Resources 3 | =================== 4 | We summarize existing related resources for bandit algorithms and off-policy evaluation. 5 | 6 | 7 | Related Datasets 8 | -------------------- 9 | Our dataset is most closely related to those of :cite:`Lefortier2016` and :cite:`Li2010`. 10 | :cite:`Lefortier2016` introduces a large-scale logged bandit feedback data (Criteo data) from a leading company in the display advertising, Criteo. 11 | The data contains context vectors of user impressions, advertisements (ads) as actions, and click indicators as reward. 12 | It also provides the ex ante probability of each ad being selected by the behavior policy. 13 | Therefore, this data can be used to compare different *off-policy learning* methods, which aim to learn a new bandit policy using only log data generated by a behavior policy. 14 | In contrast, :cite:`Li2010` introduces a dataset (Yahoo! data) collected on a news recommendation interface of the the Yahoo! Today Module. 15 | The data contains context vectors of user impressions, presented news as actions, and click indicators as reward. 16 | It was collected by running uniform random policy on the new recommendation platform, allowing researchers to evaluate their own bandit algorithms. 17 | 18 | However, the Criteo and Yahoo! data have limitations, which we overcome as follows: 19 | 20 | * The previous datasets do not provide the code (production implementation) of their behavior policy. Moreover, the data was collected by running only a single behavior policy. As a result, these data cannot be used for the evaluation and comparison of different OPE estimators. 21 | 22 | :math:`\rightarrow` In contrast, we provide the code of our behavior policies (i.e., Bernoulli TS and Random) in our pipeline, which allows researchers to re-run the same behavior policies on the log data. Our open data also contains logged bandit feedback data generated by *multiple* behavior policies. It enables the evaluation and comparison of different OPE estimators. This is the first large-scale bandit dataset that enables such evaluation of OPE with the ground-truth policy value of behavior policies. 23 | 24 | * The previous datasets do not provide a pipeline implementation to handle their data. Researchers have to re-implement the experimental environment by themselves before implementing their own methods. This may lead to inconsistent experimental conditions across different studies, potentially causing reproducibility issues. 25 | 26 | :math:`\rightarrow` We implement the Open Bandit Pipeline to simplify and standardize the experimental processing of bandit algorithms and OPE with our open data. This tool thus contributes to the reproducible and transparent use of our data. 27 | 28 | The following table summarizes key differences between our data and existing ones. 29 | 30 | .. image:: ./_static/images/related_data.png 31 | :scale: 40% 32 | :align: center 33 | 34 | Related Packages 35 | ------------------- 36 | There are several existing Python packages related to our Open Bandit Pipeline. 37 | For example, *contextualbandits* package (https://github.com/david-cortes/contextualbandits) contains implementations of several contextual bandit algorithms :cite:`Cortes2018`. 38 | It aims to provide an easy procedure to compare bandit algorithms to reproduce research papers that do not provide easily-available implementations. 39 | In addition, *RecoGym* (https://github.com/criteo-research/reco-gym) focuses on providing simulation bandit environments imitating the e-commerce recommendation setting :cite:`Rohde2018`. 40 | This package also implements an online bandit algorithm based on epsilon greedy and off-policy learning method based on IPW. 41 | 42 | However, the following features differentiate our pipeline from the previous ones: 43 | 44 | * The previous packages focus on implementing and comparing online bandit algorithms or off-policy learning method. Instead, they **cannot** be used to implement and compare the off-policy evaluation methods. 45 | 46 | :math:`\rightarrow` Our package implements a wide variety of OPE estimators including advanced ones such as Switch Estimators :cite:`Wang2016`, More Robust Doubly Robust :cite:`Farajtabar2018`, and Doubly Robust with Shrinkage :cite:`Su2019`. Moreover, it is possible to compare the estimation accuracies of these estimators with our package in a fair manner. Our package also provides flexible interfaces for implementing new OPE estimators. Thus, researchers can easily compare their own estimators with other methods using our packages. 47 | 48 | * The previous packages cannot handle real-world bandit datasets. 49 | 50 | :math:`\rightarrow` Our package comes with the Open Bandit Dataset and includes the **dataset module**. This enables the evaluation of bandit algorithms and off-policy estimators using our real-world data. This function contributes to realistic experiments on these topics. 51 | 52 | The following table summarizes key differences between our pipeline and existing ones. 53 | 54 | .. image:: ./_static/images/related_packages.png 55 | :scale: 40% 56 | :align: center 57 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | # Readthedocs requirements 2 | sphinx_rtd_theme 3 | sphinxcontrib-bibtex 4 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Open Bandit Pipeline Examples 2 | 3 | This page contains a list of examples written with Open Bandit Pipeline. 4 | 5 | - [`obd/`](./obd/): example implementations for evaluating standard off-policy estimators with the small sample Open Bandit Dataset. 6 | - [`synthetic/`](./synthetic/): example implementations for evaluating several off-policy estimators with synthetic bandit datasets. 7 | - [`multiclass/`](./multiclass/): example implementations for evaluating several off-policy estimators with multi-class classification datasets. 8 | - [`replay/`](./replay/): example implementations for evaluating Replay Method with online bandit algorithms. 9 | - [`opl/`](./opl/): example implementations for comparing the performance of several off-policy learners with synthetic bandit datasets. 10 | - [`quickstart/`](./quickstart/): some quickstart notebooks to guide the usage of Open Bandit Pipeline. 11 | -------------------------------------------------------------------------------- /examples/multiclass/README.md: -------------------------------------------------------------------------------- 1 | # Example Experiment with Multi-class Classification Data 2 | 3 | 4 | ## Description 5 | 6 | We use multi-class classification datasets to evaluate OPE estimators. Specifically, we evaluate the estimation performance of some well-known OPE estimators using the ground-truth policy value of an evaluation policy calculable with multi-class classification data. 7 | 8 | ## Evaluating Off-Policy Estimators 9 | 10 | In the following, we evaluate the estimation performance of 11 | 12 | - Direct Method (DM) 13 | - Inverse Probability Weighting (IPW) 14 | - Self-Normalized Inverse Probability Weighting (SNIPW) 15 | - Doubly Robust (DR) 16 | - Self-Normalized Doubly Robust (SNDR) 17 | - Switch Doubly Robust (Switch-DR) 18 | - Doubly Robust with Optimistic Shrinkage (DRos) 19 | 20 | For Switch-DR and DRos, we tune the built-in hyperparameters using SLOPE (Su et al., 2020; Tucker et al., 2021), a data-driven hyperparameter tuning method for OPE estimators. 21 | See [our documentation](https://zr-obp.readthedocs.io/en/latest/estimators.html) for the details about these estimators. 22 | 23 | ### Files 24 | - [`./evaluate_off_policy_estimators.py`](./evaluate_off_policy_estimators.py) implements the evaluation of OPE estimators using multi-class classification data. 25 | - [`./conf/hyperparams.yaml`](./conf/hyperparams.yaml) defines hyperparameters of some ML methods used to define regression model. 26 | 27 | ### Scripts 28 | 29 | ```bash 30 | # run evaluation of OPE estimators with multi-class classification data 31 | python evaluate_off_policy_estimators.py\ 32 | --n_runs $n_runs\ 33 | --dataset_name $dataset_name \ 34 | --eval_size $eval_size \ 35 | --base_model_for_behavior_policy $base_model_for_behavior_policy\ 36 | --alpha_b $alpha_b \ 37 | --base_model_for_evaluation_policy $base_model_for_evaluation_policy\ 38 | --alpha_e $alpha_e \ 39 | --base_model_for_reg_model $base_model_for_reg_model\ 40 | --n_jobs $n_jobs\ 41 | --random_state $random_state 42 | ``` 43 | - `$n_runs` specifies the number of simulation runs in the experiment to estimate standard deviations of the performance of OPE estimators. 44 | - `$dataset_name` specifies the name of the multi-class classification dataset and should be one of "breast_cancer", "digits", "iris", or "wine". 45 | - `$eval_size` specifies the proportion of the dataset to include in the evaluation split. 46 | - `$base_model_for_behavior_policy` specifies the base ML model for defining behavior policy and should be one of "logistic_regression", "random_forest", or "lightgbm". 47 | - `$alpha_b`: specifies the ratio of a uniform random policy when constructing a behavior policy. 48 | - `$base_model_for_evaluation_policy` specifies the base ML model for defining evaluation policy and should be one of "logistic_regression", "random_forest", or "lightgbm". 49 | - `$alpha_e`: specifies the ratio of a uniform random policy when constructing an evaluation policy. 50 | - `$base_model_for_reg_model` specifies the base ML model for defining regression model and should be one of "logistic_regression", "random_forest", or "lightgbm". 51 | - `$n_jobs` is the maximum number of concurrently running jobs. 52 | 53 | For example, the following command compares the estimation performance (relative estimation error; relative-ee) of the OPE estimators using the digits dataset. 54 | 55 | ```bash 56 | python evaluate_off_policy_estimators.py\ 57 | --n_runs 30\ 58 | --dataset_name digits\ 59 | --eval_size 0.7\ 60 | --base_model_for_behavior_policy logistic_regression\ 61 | --alpha_b 0.4\ 62 | --base_model_for_evaluation_policy random_forest\ 63 | --alpha_e 0.9\ 64 | --base_model_for_reg_model lightgbm\ 65 | --n_jobs -1\ 66 | --random_state 12345 67 | 68 | # relative-ee of OPE estimators and their standard deviations (lower is better). 69 | # ============================================= 70 | # random_state=12345 71 | # --------------------------------------------- 72 | # mean std 73 | # dm 0.436541 0.017629 74 | # ipw 0.030288 0.024506 75 | # snipw 0.022764 0.017917 76 | # dr 0.016156 0.012679 77 | # sndr 0.022082 0.016865 78 | # switch-dr 0.034657 0.018575 79 | # dr-os 0.015868 0.012537 80 | # ============================================= 81 | ``` 82 | 83 | The above result can change with different situations. You can try the evaluation of OPE with other experimental settings easily. 84 | 85 | 86 | ## References 87 | 88 | - Yi Su, Pavithra Srinath, Akshay Krishnamurthy. [Adaptive Estimator Selection for Off-Policy Evaluation](https://arxiv.org/abs/2002.07729), ICML2020. 89 | - Yi Su, Maria Dimakopoulou, Akshay Krishnamurthy, Miroslav Dudík. [Doubly Robust Off-policy Evaluation with Shrinkage](https://arxiv.org/abs/1907.09623), ICML2020. 90 | - George Tucker and Jonathan Lee. [Improved Estimator Selection for Off-Policy Evaluation](https://lyang36.github.io/icml2021_rltheory/camera_ready/79.pdf), Workshop on Reinforcement Learning 91 | Theory at ICML2021. 92 | - Yu-Xiang Wang, Alekh Agarwal, Miroslav Dudik. [Optimal and Adaptive Off-policy Evaluation in Contextual Bandits](https://arxiv.org/abs/1612.01205), ICML2017. 93 | - Miroslav Dudik, John Langford, Lihong Li. [Doubly Robust Policy Evaluation and Learning](https://arxiv.org/abs/1103.4601). ICML2011. 94 | - Yuta Saito, Shunsuke Aihara, Megumi Matsutani, Yusuke Narita. [Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation](https://arxiv.org/abs/2008.07146). NeurIPS2021 Track on Datasets and Benchmarks. 95 | 96 | -------------------------------------------------------------------------------- /examples/multiclass/conf/hyperparams.yaml: -------------------------------------------------------------------------------- 1 | lightgbm: 2 | n_estimators: 30 3 | learning_rate: 0.01 4 | max_depth: 5 5 | min_samples_leaf: 10 6 | random_state: 12345 7 | logistic_regression: 8 | max_iter: 10000 9 | C: 100 10 | random_state: 12345 11 | random_forest: 12 | n_estimators: 30 13 | max_depth: 5 14 | min_samples_leaf: 10 15 | random_state: 12345 16 | -------------------------------------------------------------------------------- /examples/obd/README.md: -------------------------------------------------------------------------------- 1 | # Example Experiment with Open Bandit Dataset 2 | 3 | ## Description 4 | 5 | We use Open Bandit Dataset to implement the evaluation of OPE. Specifically, we evaluate the estimation performance of some well-known OPE estimators using the on-policy policy value of an evaluation policy, which is calculable with the dataset. 6 | 7 | ## Evaluating Off-Policy Estimators 8 | 9 | In the following, we evaluate the estimation performance of 10 | 11 | - Direct Method (DM) 12 | - Inverse Probability Weighting (IPW) 13 | - Self-Normalized Inverse Probability Weighting (SNIPW) 14 | - Doubly Robust (DR) 15 | - Self-Normalized Doubly Robust (SNDR) 16 | - Switch Doubly Robust (Switch-DR) 17 | - Doubly Robust with Optimistic Shrinkage (DRos) 18 | 19 | For Switch-DR and DRos, we tune the built-in hyperparameters using SLOPE, a data-driven hyperparameter tuning method for OPE estimators. 20 | See [our documentation](https://zr-obp.readthedocs.io/en/latest/estimators.html) for the details about these estimators. 21 | 22 | ### Files 23 | - [`./evaluate_off_policy_estimators.py`](./evaluate_off_policy_estimators.py) implements the evaluation of OPE estimators using Open Bandit Dataset. 24 | - [`.conf/hyperparams.yaml`](./conf/hyperparams.yaml) defines hyperparameters of some ML models used as the regression model in model dependent estimators (such as DM and DR). 25 | 26 | ### Scripts 27 | 28 | ```bash 29 | # run evaluation of OPE estimators with (small size) Open Bandit Dataset 30 | python evaluate_off_policy_estimators.py\ 31 | --n_runs $n_runs\ 32 | --base_model $base_model\ 33 | --evaluation_policy $evaluation_policy\ 34 | --behavior_policy $behavior_policy\ 35 | --campaign $campaign\ 36 | --n_sim_to_compute_action_dist $n_sim_to_compute_action_dist\ 37 | --n_jobs $n_jobs\ 38 | --random_state $random_state 39 | ``` 40 | - `$n_runs` specifies the number of bootstrap sampling to estimate means and standard deviations of the performance of OPE estimators (i.e., relative estimation error). 41 | - `$base_model` specifies the base ML model for estimating the reward function, and should be one of `logistic_regression`, `random_forest`, or `lightgbm`. 42 | - `$evaluation_policy` and `$behavior_policy` specify the evaluation and behavior policies, respectively. 43 | They should be either 'bts' or 'random'. 44 | - `$campaign` specifies the campaign and should be one of 'all', 'men', or 'women'. 45 | - `$n_sim_to_compute_action_dist` is the number of monte carlo simulation to compute the action distribution of a given evaluation policy. 46 | - `$n_jobs` is the maximum number of concurrently running jobs. 47 | 48 | For example, the following command compares the estimation performance of the three OPE estimators by using Bernoulli TS as evaluation policy and Random as behavior policy in "All" campaign. 49 | 50 | ```bash 51 | python evaluate_off_policy_estimators.py\ 52 | --n_runs 30\ 53 | --base_model logistic_regression\ 54 | --evaluation_policy bts\ 55 | --behavior_policy random\ 56 | --campaign all\ 57 | --n_jobs -1 58 | 59 | # relative estimation errors of OPE estimators and their standard deviations. 60 | # ============================== 61 | # random_state=12345 62 | # ------------------------------ 63 | # mean std 64 | # dm 0.156876 0.109898 65 | # ipw 0.311082 0.311170 66 | # snipw 0.311795 0.334736 67 | # dr 0.292464 0.315485 68 | # sndr 0.302407 0.328434 69 | # switch-dr 0.258410 0.160598 70 | # dr-os 0.159520 0.109660 71 | # ============================== 72 | ``` 73 | 74 | Please refer to [this page](https://zr-obp.readthedocs.io/en/latest/evaluation_ope.html) for the evaluation of OPE protocol using our real-world data. Please visit [synthetic](../synthetic/) to try the evaluation of OPE estimators with synthetic bandit data. Moreover, in [benchmark/ope](https://github.com/st-tech/zr-obp/tree/master/benchmark/ope), we performed the benchmark experiments on several OPE estimators using the full size Open Bandit Dataset. 75 | 76 | 77 | 78 | ## References 79 | 80 | - Yi Su, Pavithra Srinath, Akshay Krishnamurthy. [Adaptive Estimator Selection for Off-Policy Evaluation](https://arxiv.org/abs/2002.07729), ICML2020. 81 | - Yi Su, Maria Dimakopoulou, Akshay Krishnamurthy, Miroslav Dudík. [Doubly Robust Off-policy Evaluation with Shrinkage](https://arxiv.org/abs/1907.09623), ICML2020. 82 | - George Tucker and Jonathan Lee. [Improved Estimator Selection for Off-Policy Evaluation](https://lyang36.github.io/icml2021_rltheory/camera_ready/79.pdf), Workshop on Reinforcement Learning 83 | Theory at ICML2021. 84 | - Yu-Xiang Wang, Alekh Agarwal, Miroslav Dudik. [Optimal and Adaptive Off-policy Evaluation in Contextual Bandits](https://arxiv.org/abs/1612.01205), ICML2017. 85 | - Miroslav Dudik, John Langford, Lihong Li. [Doubly Robust Policy Evaluation and Learning](https://arxiv.org/abs/1103.4601). ICML2011. 86 | - Yuta Saito, Shunsuke Aihara, Megumi Matsutani, Yusuke Narita. [Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation](https://arxiv.org/abs/2008.07146). NeurIPS2021 Track on Datasets and Benchmarks. 87 | 88 | -------------------------------------------------------------------------------- /examples/obd/conf/hyperparams.yaml: -------------------------------------------------------------------------------- 1 | lightgbm: 2 | n_estimators: 30 3 | learning_rate: 0.01 4 | max_depth: 5 5 | min_samples_leaf: 10 6 | random_state: 12345 7 | logistic_regression: 8 | max_iter: 10000 9 | C: 100 10 | random_state: 12345 11 | random_forest: 12 | n_estimators: 30 13 | max_depth: 5 14 | min_samples_leaf: 10 15 | random_state: 12345 16 | -------------------------------------------------------------------------------- /examples/opl/README.md: -------------------------------------------------------------------------------- 1 | # Example with Off-Policy Policy Learners 2 | 3 | 4 | ## Description 5 | 6 | We use synthetic bandit data to evaluate some off-policy learners using their ground-truth policy value calculable with synthetic data. 7 | 8 | ## Evaluating Off-Policy Learners 9 | 10 | In the following, we evaluate the performances of 11 | 12 | - Uniform Random Policy (`Random`) 13 | - Inverse Probability Weighting Policy Learner (`IPWLearner`) 14 | - Policy Learner using Neural Networks (`NNPolicyLearner`) 15 | 16 | See [our documentation](https://zr-obp.readthedocs.io/en/latest/_autosummary/obp.policy.offline.html) for the details about `IPWLearner` and `NNPolicyLearner`. 17 | 18 | `NNPolicyLearner` can use the following OPE estimators as the objective function: 19 | - Direct Method (DM) 20 | - Inverse Probability Weighting (IPW) 21 | - Doubly Robust (DR) 22 | 23 | See [our documentation](https://zr-obp.readthedocs.io/en/latest/estimators.html) for the details about these estimators. 24 | 25 | ### Files 26 | - [`./evaluate_off_policy_learners.py`](./evaluate_off_policy_learners.py) implements the evaluation of off-policy learners using synthetic bandit data. 27 | - [`./conf/hyperparams.yaml`](./conf/hyperparams.yaml) defines hyperparameters of some ML methods used to define regression model and IPWLearner. 28 | 29 | ### Scripts 30 | 31 | ```bash 32 | # run evaluation of off-policy learners with synthetic bandit data 33 | python evaluate_off_policy_learners.py\ 34 | --n_rounds $n_rounds\ 35 | --n_actions $n_actions\ 36 | --dim_context $dim_context\ 37 | --beta $beta\ 38 | --base_model_for_evaluation_policy $base_model_for_evaluation_policy\ 39 | --base_model_for_reg_model $base_model_for_reg_model\ 40 | --off_policy_objective $off_policy_objective\ 41 | --n_hidden $n_hidden\ 42 | --n_layers $n_layers\ 43 | --activation $activation\ 44 | --solver $solver\ 45 | --batch_size $batch_size\ 46 | --early_stopping\ 47 | --random_state $random_state 48 | ``` 49 | - `$n_rounds` and `$n_actions` specify the sample size and the number of actions of the synthetic bandit data, respectively. 50 | - `$dim_context` specifies the dimension of context vectors. 51 | - `$beta` specifies the inverse temperature parameter to control the behavior policy. 52 | - `$base_model_for_ipw_learner` specifies the base ML model for defining evaluation policy and should be one of "logistic_regression", "random_forest", or "lightgbm". 53 | - `$off_policy_objective` specifies the OPE estimator for NNPolicyLearner and should be one of "dm", "ipw", or "dr". 54 | - `$n_hidden` specifies the size of hidden layers in NNPolicyLearner. 55 | - `$n_layers` specifies the number of hidden layers in NNPolicyLearner. 56 | - `$activation` specifies the activation function for NNPolicyLearner and should be one of "identity", "tanh", "logistic", or "relu". 57 | - `$solver` specifies the optimizer for NNPolicyLearner and should be one of "adagrad", "sgd", or "adam". 58 | - `$batch_size` specifies the batch size for NNPolicyLearner. 59 | - `$early_stopping` enables early stopping of training of NNPolicyLearner. 60 | 61 | For example, the following command compares the performance of the off-policy learners using synthetic bandit data with 100,00 rounds, 10 actions, five dimensional context vectors. 62 | 63 | ```bash 64 | python evaluate_off_policy_learners.py\ 65 | --n_rounds 10000\ 66 | --n_actions 10\ 67 | --dim_context 5\ 68 | --base_model_for_ipw_learner logistic_regression\ 69 | --off_policy_objective ipw\ 70 | --n_hidden 100\ 71 | --n_layers 1\ 72 | --activation relu\ 73 | --solver adam\ 74 | --batch_size 200\ 75 | --early_stopping 76 | 77 | # policy values of off-policy learners (higher means better) 78 | # ============================================= 79 | # random_state=12345 80 | # --------------------------------------------- 81 | # policy value 82 | # random_policy 0.499925 83 | # ipw_learner 0.782430 84 | # nn_policy_learner (with ipw) 0.735947 85 | # ============================================= 86 | ``` 87 | 88 | The above result can change with different situations. You can try the evaluation with other experimental settings easily. 89 | 90 | -------------------------------------------------------------------------------- /examples/opl/conf/hyperparams.yaml: -------------------------------------------------------------------------------- 1 | lightgbm: 2 | n_estimators: 30 3 | learning_rate: 0.01 4 | max_depth: 5 5 | min_samples_leaf: 10 6 | random_state: 12345 7 | logistic_regression: 8 | max_iter: 10000 9 | C: 100 10 | random_state: 12345 11 | random_forest: 12 | n_estimators: 30 13 | max_depth: 5 14 | min_samples_leaf: 10 15 | random_state: 12345 16 | -------------------------------------------------------------------------------- /examples/quickstart/README.md: -------------------------------------------------------------------------------- 1 | # Open Bandit Pipeline Quickstart Notebooks 2 | 3 | This page contains a list of quickstart notebooks written with Open Bandit Pipeline. 4 | 5 | - [`obd.ipynb`](./obd.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/st-tech/zr-obp/blob/master/examples/quickstart/obd.ipynb): a quickstart guide of using Open Bandit Dataset and Pipeline to conduct some OPE experiments. 6 | - [`synthetic.ipynb`](./synthetic.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/st-tech/zr-obp/blob/master/examples/quickstart/synthetic.ipynb): a quickstart guide to implement the standard off-policy learning, OPE, and the evaluation of OPE on synthetic bandit data with Open Bandit Pipeline. 7 | - [`multiclass.ipynb`](./multiclass.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/st-tech/zr-obp/blob/master/examples/quickstart/multiclass.ipynb): a quickstart guide to handle multi-class classification data as logged bandit data for the standard off-policy learning, OPE, and the evaluation of OPE with Open Bandit Pipeline. 8 | - [`online.ipynb`](./replay.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/st-tech/zr-obp/blob/master/examples/quickstart/online.ipynb): a quickstart guide to implement OPE and the evaluation of OPE for online bandit algorithms with Open Bandit Pipeline. 9 | - [`opl.ipynb`](./opl.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/st-tech/zr-obp/blob/master/examples/quickstart/opl.ipynb): a quickstart guide to implement off-policy learners and the evaluation of off-policy learners with Open Bandit Pipeline. 10 | - [`synthetic_slate.ipynb`](./synthetic_slate.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/st-tech/zr-obp/blob/master/examples/quickstart/synthetic_slate.ipynb): a quickstart guide to implement OPE and the evaluation of OPE for the slate recommendation setting with Open Bandit Pipeline. 11 | -------------------------------------------------------------------------------- /examples/replay/README.md: -------------------------------------------------------------------------------- 1 | # Replay Example with Online Bandit Algorithms 2 | 3 | 4 | ## Description 5 | 6 | We use synthetic bandit datasets to evaluate OPE of online bandit algorithms. 7 | Specifically, we evaluate the estimation performance of some well-known OPE estimators using the ground-truth policy value of an evaluation policy calculable with synthetic data. 8 | 9 | 10 | ## Evaluating Off-Policy Estimators 11 | 12 | In the following, we evaluate the estimation performance of Replay Method (RM). 13 | RM uses a subset of the logged bandit feedback data where actions selected by the behavior policy are the same as that of the evaluation policy. 14 | Theoretically, RM is unbiased when the behavior policy is uniformly random and the evaluation policy is fixed. 15 | However, empirically, RM works well when evaluation policies are learning algorithms. 16 | Please refer to https://arxiv.org/abs/1003.5956 about the details of RM. 17 | 18 | 19 | ### Files 20 | - [`./evaluate_off_policy_estimators.py`](./evaluate_off_policy_estimators.py) implements the evaluation of OPE estimators by RM using synthetic bandit data. 21 | 22 | ### Scripts 23 | 24 | ```bash 25 | # run evaluation of OPE estimators with synthetic bandit data 26 | python evaluate_off_policy_estimators.py\ 27 | --n_runs $n_runs\ 28 | --n_rounds $n_rounds\ 29 | --n_actions $n_actions\ 30 | --n_sim $n_sim\ 31 | --dim_context $dim_context\ 32 | --n_jobs $n_jobs\ 33 | --random_state $random_state 34 | ``` 35 | - `$n_runs` specifies the number of simulation runs in the experiment to estimate standard deviations of the performance of OPE estimators. 36 | - `$n_rounds` and `$n_actions` specify the sample size and the number of actions of the synthetic bandit data. 37 | - `$dim_context` specifies the dimension of context vectors. 38 | - `$n_sim` specifeis the simulations in the Monte Carlo simulation to compute the ground-truth policy value. 39 | - `$evaluation_policy_name` specifeis the evaluation policy and should be one of "bernoulli_ts", "epsilon_greedy", "lin_epsilon_greedy", "lin_ts, lin_ucb", "logistic_epsilon_greedy", "logistic_ts", or "logistic_ucb". 40 | - `$n_jobs` is the maximum number of concurrently running jobs. 41 | 42 | For example, the following command compares the estimation performance (relative estimation error; relative-ee) of the OPE estimators using synthetic bandit data with 100,000 rounds, 30 actions, five dimensional context vectors. 43 | 44 | ```bash 45 | python evaluate_off_policy_estimators.py\ 46 | --n_runs 20\ 47 | --n_rounds 1000\ 48 | --n_actions 30\ 49 | --dim_context 5\ 50 | --evaluation_policy_name bernoulli_ts\ 51 | --n_sim 3\ 52 | --n_jobs -1\ 53 | --random_state 12345 54 | 55 | # relative-ee of OPE estimators and their standard deviations (lower means accurate). 56 | # ============================================= 57 | # random_state=12345 58 | # --------------------------------------------- 59 | # mean std 60 | # rm 0.097064 0.091453 61 | # ============================================= 62 | ``` 63 | 64 | The above result can change with different situations. 65 | You can try the evaluation of OPE with other experimental settings easily. 66 | -------------------------------------------------------------------------------- /examples/replay/evaluate_off_policy_estimators.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | 4 | from joblib import delayed 5 | from joblib import Parallel 6 | import numpy as np 7 | from pandas import DataFrame 8 | 9 | from obp.dataset import logistic_reward_function 10 | from obp.dataset import SyntheticBanditDataset 11 | from obp.ope import OffPolicyEvaluation 12 | from obp.ope import ReplayMethod 13 | from obp.policy import BernoulliTS 14 | from obp.policy import EpsilonGreedy 15 | from obp.policy import LinEpsilonGreedy 16 | from obp.policy import LinTS 17 | from obp.policy import LinUCB 18 | from obp.policy import LogisticEpsilonGreedy 19 | from obp.policy import LogisticTS 20 | from obp.policy import LogisticUCB 21 | from obp.simulator import calc_ground_truth_policy_value 22 | from obp.utils import run_bandit_replay 23 | 24 | ope_estimators = [ReplayMethod()] 25 | 26 | if __name__ == "__main__": 27 | parser = argparse.ArgumentParser( 28 | description="evaluate off-policy estimators with replay bandit algorithms and synthetic bandit data." 29 | ) 30 | parser.add_argument( 31 | "--n_runs", type=int, default=1, help="number of simulations in the experiment." 32 | ) 33 | parser.add_argument( 34 | "--n_rounds", 35 | type=int, 36 | default=10000, 37 | help="sample size of logged bandit data.", 38 | ) 39 | parser.add_argument( 40 | "--n_actions", 41 | type=int, 42 | default=10, 43 | help="number of actions.", 44 | ) 45 | parser.add_argument( 46 | "--dim_context", 47 | type=int, 48 | default=5, 49 | help="dimensions of context vectors.", 50 | ) 51 | parser.add_argument( 52 | "--n_sim", 53 | type=int, 54 | default=1, 55 | help="number of simulations to calculate ground truth policy values", 56 | ) 57 | parser.add_argument( 58 | "--evaluation_policy_name", 59 | type=str, 60 | choices=[ 61 | "bernoulli_ts", 62 | "epsilon_greedy", 63 | "lin_epsilon_greedy", 64 | "lin_ts", 65 | "lin_ucb", 66 | "logistic_epsilon_greedy", 67 | "logistic_ts", 68 | "logistic_ucb", 69 | ], 70 | required=True, 71 | help="the name of evaluation policy, bernoulli_ts, epsilon_greedy, lin_epsilon_greedy, lin_ts, lin_ucb, logistic_epsilon_greedy, logistic_ts, or logistic_ucb", 72 | ) 73 | parser.add_argument( 74 | "--n_jobs", 75 | type=int, 76 | default=1, 77 | help="the maximum number of concurrently running jobs.", 78 | ) 79 | parser.add_argument("--random_state", type=int, default=12345) 80 | args = parser.parse_args() 81 | print(args) 82 | 83 | # configurations 84 | n_runs = args.n_runs 85 | n_rounds = args.n_rounds 86 | n_actions = args.n_actions 87 | dim_context = args.dim_context 88 | n_sim = args.n_sim 89 | evaluation_policy_name = args.evaluation_policy_name 90 | n_jobs = args.n_jobs 91 | random_state = args.random_state 92 | np.random.seed(random_state) 93 | 94 | # define evaluation policy 95 | evaluation_policy_dict = dict( 96 | bernoulli_ts=BernoulliTS(n_actions=n_actions, random_state=random_state), 97 | epsilon_greedy=EpsilonGreedy( 98 | n_actions=n_actions, epsilon=0.1, random_state=random_state 99 | ), 100 | lin_epsilon_greedy=LinEpsilonGreedy( 101 | dim=dim_context, n_actions=n_actions, epsilon=0.1, random_state=random_state 102 | ), 103 | lin_ts=LinTS(dim=dim_context, n_actions=n_actions, random_state=random_state), 104 | lin_ucb=LinUCB(dim=dim_context, n_actions=n_actions, random_state=random_state), 105 | logistic_epsilon_greedy=LogisticEpsilonGreedy( 106 | dim=dim_context, n_actions=n_actions, epsilon=0.1, random_state=random_state 107 | ), 108 | logistic_ts=LogisticTS( 109 | dim=dim_context, n_actions=n_actions, random_state=random_state 110 | ), 111 | logistic_ucb=LogisticUCB( 112 | dim=dim_context, n_actions=n_actions, random_state=random_state 113 | ), 114 | ) 115 | evaluation_policy = evaluation_policy_dict[evaluation_policy_name] 116 | 117 | def process(i: int): 118 | # synthetic data generator with uniformly random policy 119 | dataset = SyntheticBanditDataset( 120 | n_actions=n_actions, 121 | dim_context=dim_context, 122 | reward_function=logistic_reward_function, 123 | behavior_policy_function=None, # uniformly random 124 | random_state=i, 125 | ) 126 | # sample new data of synthetic logged bandit feedback 127 | bandit_feedback = dataset.obtain_batch_bandit_feedback(n_rounds=n_rounds) 128 | # simulate the evaluation policy 129 | action_dist = run_bandit_replay( 130 | bandit_feedback=bandit_feedback, policy=evaluation_policy 131 | ) 132 | # estimate the ground-truth policy values of the evaluation policy 133 | # by Monte-Carlo Simulation using p(r|x,a), the reward distribution 134 | ground_truth_policy_value = calc_ground_truth_policy_value( 135 | bandit_feedback=bandit_feedback, 136 | reward_sampler=dataset.sample_reward, # p(r|x,a) 137 | policy=evaluation_policy, 138 | n_sim=n_sim, # the number of simulations 139 | ) 140 | # evaluate estimators' performances using relative estimation error (relative-ee) 141 | ope = OffPolicyEvaluation( 142 | bandit_feedback=bandit_feedback, 143 | ope_estimators=ope_estimators, 144 | ) 145 | metric_i = ope.evaluate_performance_of_estimators( 146 | ground_truth_policy_value=ground_truth_policy_value, 147 | action_dist=action_dist, 148 | ) 149 | 150 | return metric_i 151 | 152 | processed = Parallel( 153 | n_jobs=n_jobs, 154 | verbose=50, 155 | )([delayed(process)(i) for i in np.arange(n_runs)]) 156 | metric_dict = {est.estimator_name: dict() for est in ope_estimators} 157 | for i, metric_i in enumerate(processed): 158 | for ( 159 | estimator_name, 160 | relative_ee_, 161 | ) in metric_i.items(): 162 | metric_dict[estimator_name][i] = relative_ee_ 163 | se_df = DataFrame(metric_dict).describe().T.round(6) 164 | 165 | print("=" * 45) 166 | print(f"random_state={random_state}") 167 | print("-" * 45) 168 | print(se_df[["mean", "std"]]) 169 | print("=" * 45) 170 | 171 | # save results of the evaluation of off-policy estimators in './logs' directory. 172 | log_path = Path("./logs") 173 | log_path.mkdir(exist_ok=True, parents=True) 174 | se_df.to_csv(log_path / "relative_ee_of_ope_estimators.csv") 175 | -------------------------------------------------------------------------------- /examples/synthetic/README.md: -------------------------------------------------------------------------------- 1 | # Example Experiment with Synthetic Bandit Data 2 | 3 | ## Description 4 | 5 | We use synthetic bandit datasets to evaluate OPE estimators. Specifically, we evaluate the estimation performance of well-known estimators using the ground-truth policy value of an evaluation policy calculable with synthetic data. 6 | 7 | ## Evaluating Off-Policy Estimators 8 | 9 | In the following, we evaluate the estimation performance of 10 | 11 | - Direct Method (DM) 12 | - Inverse Probability Weighting (IPW) 13 | - Self-Normalized Inverse Probability Weighting (SNIPW) 14 | - Doubly Robust (DR) 15 | - Self-Normalized Doubly Robust (SNDR) 16 | - Switch Doubly Robust (Switch-DR) 17 | - Doubly Robust with Optimistic Shrinkage (DRos) 18 | 19 | For Switch-DR and DRos, we tune the built-in hyperparameters using SLOPE, a data-driven hyperparameter tuning method for OPE estimators. 20 | See [our documentation](https://zr-obp.readthedocs.io/en/latest/estimators.html) for the details about these estimators. 21 | 22 | ### Files 23 | - [`./evaluate_off_policy_estimators.py`](./evaluate_off_policy_estimators.py) implements the evaluation of OPE estimators using synthetic bandit data. 24 | - [`./conf/hyperparams.yaml`](./conf/hyperparams.yaml) defines hyperparameters of some ML methods used to define regression model and IPWLearner. 25 | 26 | ### Scripts 27 | 28 | ```bash 29 | # run evaluation of OPE estimators with synthetic bandit data 30 | python evaluate_off_policy_estimators.py\ 31 | --n_runs $n_runs\ 32 | --n_rounds $n_rounds\ 33 | --n_actions $n_actions\ 34 | --dim_context $dim_context\ 35 | --beta $beta\ 36 | --base_model_for_evaluation_policy $base_model_for_evaluation_policy\ 37 | --base_model_for_reg_model $base_model_for_reg_model\ 38 | --n_jobs $n_jobs\ 39 | --random_state $random_state 40 | ``` 41 | - `$n_runs` specifies the number of simulation runs in the experiment to estimate standard deviations of the performance of OPE estimators. 42 | - `$n_rounds` and `$n_actions` specify the sample size and the number of actions of the synthetic bandit data, respectively. 43 | - `$dim_context` specifies the dimension of context vectors. 44 | - `$beta` specifies the inverse temperature parameter to control the behavior policy. 45 | - `$base_model_for_evaluation_policy` specifies the base ML model for defining evaluation policy and should be one of "logistic_regression", "random_forest", or "lightgbm". 46 | - `$base_model_for_reg_model` specifies the base ML model for defining regression model and should be one of "logistic_regression", "random_forest", or "lightgbm". 47 | - `$n_jobs` is the maximum number of concurrently running jobs. 48 | 49 | For example, the following command compares the estimation performance (relative estimation error; relative-ee) of the OPE estimators using synthetic bandit data with 10,000 samples, 30 actions, five dimensional context vectors. 50 | 51 | ```bash 52 | python evaluate_off_policy_estimators.py\ 53 | --n_runs 20\ 54 | --n_rounds 10000\ 55 | --n_actions 30\ 56 | --dim_context 5\ 57 | --beta -3\ 58 | --base_model_for_evaluation_policy logistic_regression\ 59 | --base_model_for_reg_model logistic_regression\ 60 | --n_jobs -1\ 61 | --random_state 12345 62 | 63 | # relative-ee of OPE estimators and their standard deviations (lower means accurate). 64 | # ============================================= 65 | # random_state=12345 66 | # --------------------------------------------- 67 | # mean std 68 | # dm 0.074390 0.024525 69 | # ipw 0.009481 0.006899 70 | # snipw 0.006665 0.004541 71 | # dr 0.006175 0.004245 72 | # sndr 0.006118 0.003997 73 | # switch-dr 0.006175 0.004245 74 | # dr-os 0.021951 0.013337 75 | # ============================================= 76 | ``` 77 | 78 | The above result can change with different situations. You can try the evaluation of OPE with other experimental settings easily. 79 | 80 | ## References 81 | 82 | - Yi Su, Pavithra Srinath, Akshay Krishnamurthy. [Adaptive Estimator Selection for Off-Policy Evaluation](https://arxiv.org/abs/2002.07729), ICML2020. 83 | - Yi Su, Maria Dimakopoulou, Akshay Krishnamurthy, Miroslav Dudík. [Doubly Robust Off-policy Evaluation with Shrinkage](https://arxiv.org/abs/1907.09623), ICML2020. 84 | - George Tucker and Jonathan Lee. [Improved Estimator Selection for Off-Policy Evaluation](https://lyang36.github.io/icml2021_rltheory/camera_ready/79.pdf), Workshop on Reinforcement Learning 85 | Theory at ICML2021. 86 | - Yu-Xiang Wang, Alekh Agarwal, Miroslav Dudik. [Optimal and Adaptive Off-policy Evaluation in Contextual Bandits](https://arxiv.org/abs/1612.01205), ICML2017. 87 | - Miroslav Dudik, John Langford, Lihong Li. [Doubly Robust Policy Evaluation and Learning](https://arxiv.org/abs/1103.4601). ICML2011. 88 | - Yuta Saito, Shunsuke Aihara, Megumi Matsutani, Yusuke Narita. [Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation](https://arxiv.org/abs/2008.07146). NeurIPS2021 Track on Datasets and Benchmarks. 89 | 90 | -------------------------------------------------------------------------------- /examples/synthetic/conf/hyperparams.yaml: -------------------------------------------------------------------------------- 1 | lightgbm: 2 | n_estimators: 30 3 | learning_rate: 0.01 4 | max_depth: 5 5 | min_samples_leaf: 10 6 | random_state: 12345 7 | logistic_regression: 8 | max_iter: 10000 9 | C: 100 10 | random_state: 12345 11 | random_forest: 12 | n_estimators: 30 13 | max_depth: 5 14 | min_samples_leaf: 10 15 | random_state: 12345 16 | -------------------------------------------------------------------------------- /images/dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/images/dataset.png -------------------------------------------------------------------------------- /images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/images/logo.png -------------------------------------------------------------------------------- /images/obd_stats.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/images/obd_stats.png -------------------------------------------------------------------------------- /images/ope_results_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/images/ope_results_example.png -------------------------------------------------------------------------------- /images/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/images/overview.png -------------------------------------------------------------------------------- /images/recommended_fashion_items.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/images/recommended_fashion_items.png -------------------------------------------------------------------------------- /obd/README.md: -------------------------------------------------------------------------------- 1 | # Open Bandit Dataset 2 | 3 | This directory contains the small size (10,000 records for each pair of campaign and behavior policy) version of our data that can be used for running our [quickstart guide](https://github.com/st-tech/zr-obp/blob/master/examples/quickstart/obd.ipynb) and [examples](https://github.com/st-tech/zr-obp/tree/master/examples/obd). 4 | The full size version of our data is available at [https://research.zozo.com/data.html](https://research.zozo.com/data.html). 5 | 6 | 7 | This dataset is released along with the paper: 8 | 9 | Yuta Saito, Shunsuke Aihara, Megumi Matsutani, Yusuke Narita.
10 | **Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation**
11 | [https://arxiv.org/abs/2008.07146](https://arxiv.org/abs/2008.07146) 12 | 13 | When using this dataset, please cite the paper with following bibtex: 14 | ``` 15 | @article{saito2020open, 16 | title={Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation}, 17 | author={Saito, Yuta and Shunsuke, Aihara and Megumi, Matsutani and Yusuke, Narita}, 18 | journal={arXiv preprint arXiv:2008.07146}, 19 | year={2020} 20 | } 21 | ``` 22 | 23 | ## Data description 24 | Open Bandit Dataset is constructed in an A/B test of two multi-armed bandit policies on a large-scale fashion e-commerce platform, [ZOZOTOWN](https://zozo.jp/). 25 | It currently consists of a total of about 26M rows, each one representing a user impression with some feature values, selected items as actions, true propensity scores, and click indicators as an outcome. 26 | This is especially suitable for evaluating *off-policy evaluation* (OPE), which aims to estimate the counterfactual performance of hypothetical algorithms using data generated by a different algorithm. 27 | 28 | 29 | ## Fields 30 | Here is a detailed description of the fields (they are comma-separated in the CSV files): 31 | 32 | **{behavior_policy}/{campaign}.csv** (behavior_policy in (bts, random), campaign in (all, men, women)) 33 | - timestamp: timestamps of impressions. 34 | - item_id: index of items as arms (index ranges from 0-79 in "All" campaign, 0-33 for "Men" campaign, and 0-45 "Women" campaign). 35 | - position: the position of an item being recommended (1, 2, or 3 correspond to left, center, and right position of the ZOZOTOWN recommendation interface, respectively). 36 | - click: target variable that indicates if an item was clicked (1) or not (0). 37 | - action_prob: the probability of an item being recommended at the given position. 38 | - user_features: user-related feature values. 39 | - user_item_affinity: user-item affinity scores induced by the number of past clicks observed between each user-item pair. 40 | 41 |

42 | 43 |

44 |

45 | Structure of Open Bandit Dataset 46 |

47 |
48 |

49 | 50 | **item_context.csv** 51 | - item_id: index of items as arms (index ranges from 0-80 in "All" campaign, 0-33 for "Men" campaign, and 0-46 "Women" campaign). 52 | - item feature 0-3: item related feature values 53 | 54 | 55 | Note that user and item features are now anonymized using a hash function. 56 | 57 | ## Contact 58 | For any question, feel free to contact: 59 | 60 | - The authors of the paper: saito@hanjuku-kaso.com 61 | - ZOZO Research: zozo-research@zozo.com 62 | -------------------------------------------------------------------------------- /obd/README_JN.md: -------------------------------------------------------------------------------- 1 | # Open Bandit Dataset 2 | 3 | このディレクトリには, [実装例](https://github.com/st-tech/zr-obp/tree/master/examples)を実行するための少量(キャンペーンと行動ポリシーのペアごとに10,000レコード)のデータが含まれています. フルサイズ版のデータは[https://research.zozo.com/data.html](https://research.zozo.com/data.html)にて公開されています. 4 | 5 | この公開データセットに関する詳細な記述は以下の論文を参照してください: 6 | 7 | Yuta Saito, Shunsuke Aihara, Megumi Matsutani, Yusuke Narita.
8 | **Large-scale Open Dataset, Pipeline, and Benchmark for Bandit Algorithms**
9 | [https://arxiv.org/abs/2008.07146](https://arxiv.org/abs/2008.07146) 10 | 11 | ## データセットの概要 12 | Open Bandit Datasetは, 大規模ファッションECサイト[ZOZOTOWN](https://zozo.jp/)において, 2つの多腕バンディット方策のA/Bテストによって構築されたものです. 現在のログデータ数は合計2600万以上であり, それぞれのデータは特徴量・方策によって選択されたファッションアイテム・真の傾向スコア・クリック有無ラベルによって構成されます. このデータセットは, 別のアルゴリズムによって生成されたデータを用いて反実仮想アルゴリズムの性能を予測するオフ方策評価 (off-policy evaluation)の性能を評価するのに特に適しています. 13 | 14 | 15 | ## 構成 16 | データセットの構成要素の詳細は以下の通りです. 17 | 18 | **{behavior_policy}/{campaign}.csv** (behavior_policy in (bts, random), campaign in (all, men, women)) 19 | - timestamp: インプレッションのタイムスタンプ. 20 | - item_id: アイテムのインデックス(インデックスの範囲は「すべて」キャンペーンでは0~80, 「男性」キャンペーンでは0~33, 「女性」キャンペーンでは0~46). 21 | - position: 推薦されるアイテムの位置(1, 2, 3はそれぞれ[ZOZOTOWNの推薦インターフェース](../images/recommended_fashion_items.png)の左, 中央, 右の位置に対応) 22 | - click: アイテムがクリックされたか(1), されなかったか(0)を示す2値目的変数. 23 | - action_prob:与えられたpositionにアイテムが推薦された際に計算した推薦確率. 傾向スコア. 24 | - user_features:ユーザーに関連する特徴量. 匿名化の目的でハッシュ化されている. 25 | - user_item_affinity: それぞれのユーザとアイテムのペア間で観測された過去のクリック数に応じた関連度特徴量 26 | 27 | **item_context.csv** **item_context.csv** **item_context.csv** 28 | - item_id:アイテムのインデックス(インデックスの範囲は, 「すべて」キャンペーンでは0~79, 「男性」キャンペーンでは0~33, 「女性」キャンペーンでは0~45). 29 | - item feature 0-3:アイテムに関連するの特徴量. 30 | 31 |

32 | 33 |

34 |

35 | Open Bandit Datasetの構成 36 |

37 |
38 |

39 | 40 | なお, user featureとitem featureのそれぞれが何を表すかについては、現在公表されておりません. 41 | また, それぞれのfeatureの値は, ハッシュ関数を用いて匿名化されています. 42 | 43 | ## 連絡 44 | データセットに関する質問等は, 次のメールアドレスにご連絡いただくようお願いいたします: 45 | 46 | - 論文の著者: saito@hanjuku-kaso.com 47 | - ZOZO研究所: zozo-research@zozo.com 48 | -------------------------------------------------------------------------------- /obd/bts/men/item_context.csv: -------------------------------------------------------------------------------- 1 | ,item_id,item_feature_0,item_feature_1,item_feature_2,item_feature_3 2 | 0,0,-0.6771831139635117,c82d13885d8bf7a3b8b9fa6f0842ba60,088abf8a8657959e46ac19af8da80d15,8ea65bc866b36a8f00ae913e0c3acc29 3 | 1,1,-0.7202996418188664,77490d05a721c6d93edf580642ffd8bd,088abf8a8657959e46ac19af8da80d15,8ea65bc866b36a8f00ae913e0c3acc29 4 | 2,2,0.7456623052631924,77490d05a721c6d93edf580642ffd8bd,bfcce809dad48aadc7fcbe714f9eabd7,7a0c97ee71eb7985bd0a6271ce57cec5 5 | 3,3,-0.6987413778911891,135f410ec21307919cd92df77f1e2a36,ff2de7df709624e5b79199b850382ea0,68f8b5168b2a322db725a6cd6f5c900b 6 | 4,4,1.6511093902256406,61a525de9976c0f3fa29d400caf26c56,ee987234ffe4f3d901846ac3f7417738,7a0c97ee71eb7985bd0a6271ce57cec5 7 | 5,5,0.14203091528822703,61a525de9976c0f3fa29d400caf26c56,bb7caf7f0c11f7827fb23b331777b871,8ea65bc866b36a8f00ae913e0c3acc29 8 | 6,6,1.6511093902256406,c82d13885d8bf7a3b8b9fa6f0842ba60,818dfe387422471f09a34db693a78212,7a0c97ee71eb7985bd0a6271ce57cec5 9 | 7,7,2.8583721701755715,61a525de9976c0f3fa29d400caf26c56,bfcce809dad48aadc7fcbe714f9eabd7,7a0c97ee71eb7985bd0a6271ce57cec5 10 | 8,8,1.349293695238158,61a525de9976c0f3fa29d400caf26c56,7daaf8717f83289266063b6cc1728087,7a0c97ee71eb7985bd0a6271ce57cec5 11 | 9,9,1.1983858477444165,135f410ec21307919cd92df77f1e2a36,bfcce809dad48aadc7fcbe714f9eabd7,7a0c97ee71eb7985bd0a6271ce57cec5 12 | 10,10,1.5864345984426087,135f410ec21307919cd92df77f1e2a36,bfcce809dad48aadc7fcbe714f9eabd7,7a0c97ee71eb7985bd0a6271ce57cec5 13 | 11,11,0.44384661027570976,c82d13885d8bf7a3b8b9fa6f0842ba60,24ea3b3a472c51dd6299ebdfb220a55f,0c3b42b13b5a49fcb746da9f60e63717 14 | 12,12,1.1983858477444165,c82d13885d8bf7a3b8b9fa6f0842ba60,0e077f97ef2dcda0dc404f873fc5f96c,7a0c97ee71eb7985bd0a6271ce57cec5 15 | 13,13,0.6163127216971285,135f410ec21307919cd92df77f1e2a36,0e077f97ef2dcda0dc404f873fc5f96c,7a0c97ee71eb7985bd0a6271ce57cec5 16 | 14,14,-1.000557072878672,135f410ec21307919cd92df77f1e2a36,865945b5265169a2176a6e5f084ab2eb,8ea65bc866b36a8f00ae913e0c3acc29 17 | 15,15,-0.37536741897602904,c82d13885d8bf7a3b8b9fa6f0842ba60,786ff5d72b02d1e68a43508d9579977d,68f8b5168b2a322db725a6cd6f5c900b 18 | 16,16,-0.5909500582528024,c82d13885d8bf7a3b8b9fa6f0842ba60,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b 19 | 17,17,-0.6987413778911891,135f410ec21307919cd92df77f1e2a36,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b 20 | 18,18,-0.9143240171679625,77490d05a721c6d93edf580642ffd8bd,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b 21 | 19,19,-0.7634161696742211,77490d05a721c6d93edf580642ffd8bd,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b 22 | 20,20,-0.6125083221804798,77490d05a721c6d93edf580642ffd8bd,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b 23 | 21,21,-0.6987413778911891,135f410ec21307919cd92df77f1e2a36,ff2de7df709624e5b79199b850382ea0,68f8b5168b2a322db725a6cd6f5c900b 24 | 22,22,-0.6987413778911891,17ef71cb22e550d31e5eaa4d629c4abd,088abf8a8657959e46ac19af8da80d15,8ea65bc866b36a8f00ae913e0c3acc29 25 | 23,23,-0.5693917943251251,e1b1451d555c82a01874347dbecdfeae,01b306b40a448bff555c06d5d72c0171,7a0c97ee71eb7985bd0a6271ce57cec5 26 | 24,24,0.4222883463480324,f15de9aa508214df06454736b488717c,7daaf8717f83289266063b6cc1728087,7a0c97ee71eb7985bd0a6271ce57cec5 27 | 25,25,-0.4616004746867384,135f410ec21307919cd92df77f1e2a36,088abf8a8657959e46ac19af8da80d15,8ea65bc866b36a8f00ae913e0c3acc29 28 | 26,26,0.8965701527569339,77490d05a721c6d93edf580642ffd8bd,746facf4548f3da6d628b8e35bf9e6ec,7a0c97ee71eb7985bd0a6271ce57cec5 29 | 27,27,-0.8496492253849305,17ef71cb22e550d31e5eaa4d629c4abd,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b 30 | 28,28,-1.0652318646617038,17ef71cb22e550d31e5eaa4d629c4abd,a46137fea33ac48f0809591a76630ea5,68f8b5168b2a322db725a6cd6f5c900b 31 | 29,29,-0.8496492253849305,17ef71cb22e550d31e5eaa4d629c4abd,008dc8758000efaf5b318227fcb71f8d,8ea65bc866b36a8f00ae913e0c3acc29 32 | 30,30,-0.9143240171679625,17ef71cb22e550d31e5eaa4d629c4abd,865945b5265169a2176a6e5f084ab2eb,8ea65bc866b36a8f00ae913e0c3acc29 33 | 31,31,-0.4616004746867384,e1b1451d555c82a01874347dbecdfeae,008dc8758000efaf5b318227fcb71f8d,8ea65bc866b36a8f00ae913e0c3acc29 34 | 32,32,-0.5262752664697704,f15de9aa508214df06454736b488717c,a46137fea33ac48f0809591a76630ea5,68f8b5168b2a322db725a6cd6f5c900b 35 | 33,33,-0.6125083221804798,f15de9aa508214df06454736b488717c,088abf8a8657959e46ac19af8da80d15,8ea65bc866b36a8f00ae913e0c3acc29 36 | -------------------------------------------------------------------------------- /obd/bts/women/item_context.csv: -------------------------------------------------------------------------------- 1 | ,item_id,item_feature_0,item_feature_1,item_feature_2,item_feature_3 2 | 0,0,-0.3701057045375884,37784fea97b5827eeaf4a23dbff98b73,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1 3 | 1,1,0.5251956676347125,3220392a73f0fb73e5509a3f6b89ae64,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57 4 | 2,2,-0.13450008028171972,1f0bd59babc615f7876d70abd81b0703,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1 5 | 3,3,-0.5350296415166964,37784fea97b5827eeaf4a23dbff98b73,30e4f82eec0c5210c403aab8007a5881,2951c610187f9e9e8281ecd31a156bd1 6 | 4,4,-0.25230289240965403,3220392a73f0fb73e5509a3f6b89ae64,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d 7 | 5,5,0.03042385669738834,3220392a73f0fb73e5509a3f6b89ae64,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d 8 | 6,6,-0.13450008028171972,3220392a73f0fb73e5509a3f6b89ae64,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d 9 | 7,7,-0.8436730092918844,54130721ea2331736ec3cd62c6ff2a0a,92b8c61a1a556299172a5705f5a927db,360f242a6660cf5ee5249dc3c197fe62 10 | 8,8,-0.8436730092918844,54130721ea2331736ec3cd62c6ff2a0a,92b8c61a1a556299172a5705f5a927db,360f242a6660cf5ee5249dc3c197fe62 11 | 9,9,-0.6080673850360158,54130721ea2331736ec3cd62c6ff2a0a,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57 12 | 10,10,-0.6080673850360158,54130721ea2331736ec3cd62c6ff2a0a,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57 13 | 11,11,0.36027173065560447,e88594e2095dc09c70763bd14b6bb16e,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1 14 | 12,12,0.36027173065560447,e88594e2095dc09c70763bd14b6bb16e,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d 15 | 13,13,2.9990547223213335,37784fea97b5827eeaf4a23dbff98b73,cedae94e7ca42bac679afaf582fda539,e6dceba864edcc7bf60d38616a52a13d 16 | 14,14,1.3498153525302528,1f0bd59babc615f7876d70abd81b0703,cedae94e7ca42bac679afaf582fda539,e6dceba864edcc7bf60d38616a52a13d 17 | 15,15,-0.25230289240965403,3220392a73f0fb73e5509a3f6b89ae64,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d 18 | 16,16,0.6901196046138206,1f0bd59babc615f7876d70abd81b0703,40b2c280a2676cf7e83a2c19a333d4a2,7ce347fef632da56f7d0cd2e3d96c9d2 19 | 17,17,2.339358974404901,1f0bd59babc615f7876d70abd81b0703,5e32ca87b332cb657386052c2962f06f,e6dceba864edcc7bf60d38616a52a13d 20 | 18,18,-0.39366626696317525,3220392a73f0fb73e5509a3f6b89ae64,c72dd70c97975aad5865c138b5c3c501,360f242a6660cf5ee5249dc3c197fe62 21 | 19,19,-0.8648775154749125,37784fea97b5827eeaf4a23dbff98b73,c72dd70c97975aad5865c138b5c3c501,360f242a6660cf5ee5249dc3c197fe62 22 | 20,20,0.8550435415929286,e88594e2095dc09c70763bd14b6bb16e,c72dd70c97975aad5865c138b5c3c501,360f242a6660cf5ee5249dc3c197fe62 23 | 21,21,-0.6292718912190439,1f0bd59babc615f7876d70abd81b0703,c72dd70c97975aad5865c138b5c3c501,360f242a6660cf5ee5249dc3c197fe62 24 | 22,22,0.8550435415929286,e88594e2095dc09c70763bd14b6bb16e,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57 25 | 23,23,-0.2994240172608278,37784fea97b5827eeaf4a23dbff98b73,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57 26 | 24,24,-0.7235141409213913,3220392a73f0fb73e5509a3f6b89ae64,9dc794e30838384fe6068c9636d35d39,360f242a6660cf5ee5249dc3c197fe62 27 | 25,25,-0.95911976517726,980e8ad619a60423e616b67cfb8e09b9,9dc794e30838384fe6068c9636d35d39,360f242a6660cf5ee5249dc3c197fe62 28 | 26,26,-0.6292718912190439,980e8ad619a60423e616b67cfb8e09b9,a164a8f4dbd09847e25a3956e12bccff,360f242a6660cf5ee5249dc3c197fe62 29 | 27,27,-0.6292718912190439,980e8ad619a60423e616b67cfb8e09b9,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57 30 | 28,28,0.3367111682300176,72f3f67e8e9907b474c547847f8d5fd3,92b8c61a1a556299172a5705f5a927db,360f242a6660cf5ee5249dc3c197fe62 31 | 29,29,-0.2994240172608278,980e8ad619a60423e616b67cfb8e09b9,a3438007435a63dbe0ea33f5a0d1e84a,360f242a6660cf5ee5249dc3c197fe62 32 | 30,30,-0.2994240172608278,980e8ad619a60423e616b67cfb8e09b9,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1 33 | 31,31,-0.2994240172608278,980e8ad619a60423e616b67cfb8e09b9,a3438007435a63dbe0ea33f5a0d1e84a,360f242a6660cf5ee5249dc3c197fe62 34 | 32,32,0.17178723125090953,72f3f67e8e9907b474c547847f8d5fd3,04a71d6c9b0aa3b9e462a6923d1e8393,25e55d04edea9bd0a20aff26ac263414 35 | 33,33,-0.32298457968641464,72f3f67e8e9907b474c547847f8d5fd3,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d 36 | 34,34,0.5016351052091257,72f3f67e8e9907b474c547847f8d5fd3,5e32ca87b332cb657386052c2962f06f,e6dceba864edcc7bf60d38616a52a13d 37 | 35,35,-0.5585902039422833,72f3f67e8e9907b474c547847f8d5fd3,b02dadb348cf4ac330bf1d90cb80237e,2951c610187f9e9e8281ecd31a156bd1 38 | 36,36,-0.13450008028171972,3220392a73f0fb73e5509a3f6b89ae64,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1 39 | 37,37,-0.39366626696317525,f3a3cc32a3967214164eb2709555b3f7,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1 40 | 38,38,-0.3701057045375884,cd7b41b498ea6d9180ad3fd389422c39,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57 41 | 39,39,-0.46434795423993586,734fc1b871abffa4db3be9bc16ad80f7,b02dadb348cf4ac330bf1d90cb80237e,2951c610187f9e9e8281ecd31a156bd1 42 | 40,40,4.15352228117509,e88594e2095dc09c70763bd14b6bb16e,cedae94e7ca42bac679afaf582fda539,e6dceba864edcc7bf60d38616a52a13d 43 | 41,41,0.03042385669738834,734fc1b871abffa4db3be9bc16ad80f7,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d 44 | 42,42,-0.4879085166655227,980e8ad619a60423e616b67cfb8e09b9,9dc794e30838384fe6068c9636d35d39,360f242a6660cf5ee5249dc3c197fe62 45 | 43,43,-0.95911976517726,980e8ad619a60423e616b67cfb8e09b9,9dc794e30838384fe6068c9636d35d39,360f242a6660cf5ee5249dc3c197fe62 46 | 44,44,-0.46434795423993586,980e8ad619a60423e616b67cfb8e09b9,a164a8f4dbd09847e25a3956e12bccff,360f242a6660cf5ee5249dc3c197fe62 47 | 45,45,-0.7941958281981519,980e8ad619a60423e616b67cfb8e09b9,1b433010466b794694fc6f5f29eac0d8,360f242a6660cf5ee5249dc3c197fe62 48 | -------------------------------------------------------------------------------- /obd/random/men/item_context.csv: -------------------------------------------------------------------------------- 1 | ,item_id,item_feature_0,item_feature_1,item_feature_2,item_feature_3 2 | 0,0,-0.6771831139635117,ceca20033d7d36b74dc683ddfb804aa7,eb6f942c01859574cb88d2e62bf84354,795091554fd8f6b4a0ca7df81bf50a64 3 | 1,1,-0.7202996418188664,270de57201b8ec18df9a72ed7ecf20eb,eb6f942c01859574cb88d2e62bf84354,795091554fd8f6b4a0ca7df81bf50a64 4 | 2,2,0.7456623052631924,270de57201b8ec18df9a72ed7ecf20eb,2ba5916b91fd2d0688459ba79f033a9b,14fb049a96497a5deef345c1c38b2467 5 | 3,3,-0.6987413778911891,cb4655bc2d2e54055efefb998883d6fe,1d8ba92fbaa83078dfe330d66b81e5d6,5cc21cc265333250f10b13783ab06472 6 | 4,4,1.6511093902256406,ca9488139d82dbbf68a4e71fc7fe52f9,f65e8237cca7eb6b12f4f009a28a6f72,14fb049a96497a5deef345c1c38b2467 7 | 5,5,0.14203091528822703,ca9488139d82dbbf68a4e71fc7fe52f9,571216af60c365e6a05e1c33c7041f5f,795091554fd8f6b4a0ca7df81bf50a64 8 | 6,6,1.6511093902256406,ceca20033d7d36b74dc683ddfb804aa7,d56aaef6375c7844851af69b354331ba,14fb049a96497a5deef345c1c38b2467 9 | 7,7,2.8583721701755715,ca9488139d82dbbf68a4e71fc7fe52f9,2ba5916b91fd2d0688459ba79f033a9b,14fb049a96497a5deef345c1c38b2467 10 | 8,8,1.349293695238158,ca9488139d82dbbf68a4e71fc7fe52f9,ef0257571cb05e9c0bba5446f9cfb0c9,14fb049a96497a5deef345c1c38b2467 11 | 9,9,1.1983858477444165,cb4655bc2d2e54055efefb998883d6fe,2ba5916b91fd2d0688459ba79f033a9b,14fb049a96497a5deef345c1c38b2467 12 | 10,10,1.5864345984426087,cb4655bc2d2e54055efefb998883d6fe,2ba5916b91fd2d0688459ba79f033a9b,14fb049a96497a5deef345c1c38b2467 13 | 11,11,0.44384661027570976,ceca20033d7d36b74dc683ddfb804aa7,b1dbb432e49fb71cc3b3e820ff31f3ad,6893a4373a4e271e7f03b7a4bdfde4a3 14 | 12,12,1.1983858477444165,ceca20033d7d36b74dc683ddfb804aa7,09122ea36aaf2a8dff8f089286af7cf3,14fb049a96497a5deef345c1c38b2467 15 | 13,13,0.6163127216971285,cb4655bc2d2e54055efefb998883d6fe,09122ea36aaf2a8dff8f089286af7cf3,14fb049a96497a5deef345c1c38b2467 16 | 14,14,-1.000557072878672,cb4655bc2d2e54055efefb998883d6fe,ec5fb795fb7b3a111ad15e1506487535,795091554fd8f6b4a0ca7df81bf50a64 17 | 15,15,-0.37536741897602904,ceca20033d7d36b74dc683ddfb804aa7,e26d13daee6e371dead874b89752bbbe,5cc21cc265333250f10b13783ab06472 18 | 16,16,-0.5909500582528024,ceca20033d7d36b74dc683ddfb804aa7,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472 19 | 17,17,-0.6987413778911891,cb4655bc2d2e54055efefb998883d6fe,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472 20 | 18,18,-0.9143240171679625,270de57201b8ec18df9a72ed7ecf20eb,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472 21 | 19,19,-0.7634161696742211,270de57201b8ec18df9a72ed7ecf20eb,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472 22 | 20,20,-0.6125083221804798,270de57201b8ec18df9a72ed7ecf20eb,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472 23 | 21,21,-0.6987413778911891,cb4655bc2d2e54055efefb998883d6fe,1d8ba92fbaa83078dfe330d66b81e5d6,5cc21cc265333250f10b13783ab06472 24 | 22,22,-0.6987413778911891,dbb8044a5cc8d79d0e5c3cf996e2d0b9,eb6f942c01859574cb88d2e62bf84354,795091554fd8f6b4a0ca7df81bf50a64 25 | 23,23,-0.5693917943251251,0450516d22e9e70b0ee136549576d0e7,937bfc1b19face0ab0a21dddaeaf19cd,14fb049a96497a5deef345c1c38b2467 26 | 24,24,0.4222883463480324,314759c31d4b75b54dfbbeb887f7bbe8,ef0257571cb05e9c0bba5446f9cfb0c9,14fb049a96497a5deef345c1c38b2467 27 | 25,25,-0.4616004746867384,cb4655bc2d2e54055efefb998883d6fe,eb6f942c01859574cb88d2e62bf84354,795091554fd8f6b4a0ca7df81bf50a64 28 | 26,26,0.8965701527569339,270de57201b8ec18df9a72ed7ecf20eb,ff86755a0252ce6d030f37e89025f60f,14fb049a96497a5deef345c1c38b2467 29 | 27,27,-0.8496492253849305,dbb8044a5cc8d79d0e5c3cf996e2d0b9,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472 30 | 28,28,-1.0652318646617038,dbb8044a5cc8d79d0e5c3cf996e2d0b9,3f7cf3ddf1cc36d8310a8c0a48187aa9,5cc21cc265333250f10b13783ab06472 31 | 29,29,-0.8496492253849305,dbb8044a5cc8d79d0e5c3cf996e2d0b9,5adc59d478af904390b1de5af7f33d45,795091554fd8f6b4a0ca7df81bf50a64 32 | 30,30,-0.9143240171679625,dbb8044a5cc8d79d0e5c3cf996e2d0b9,ec5fb795fb7b3a111ad15e1506487535,795091554fd8f6b4a0ca7df81bf50a64 33 | 31,31,-0.4616004746867384,0450516d22e9e70b0ee136549576d0e7,5adc59d478af904390b1de5af7f33d45,795091554fd8f6b4a0ca7df81bf50a64 34 | 32,32,-0.5262752664697704,314759c31d4b75b54dfbbeb887f7bbe8,3f7cf3ddf1cc36d8310a8c0a48187aa9,5cc21cc265333250f10b13783ab06472 35 | 33,33,-0.6125083221804798,314759c31d4b75b54dfbbeb887f7bbe8,eb6f942c01859574cb88d2e62bf84354,795091554fd8f6b4a0ca7df81bf50a64 36 | -------------------------------------------------------------------------------- /obd/random/women/item_context.csv: -------------------------------------------------------------------------------- 1 | ,item_id,item_feature_0,item_feature_1,item_feature_2,item_feature_3 2 | 0,0,-0.3701057045375884,01a0a328db2dd2a2e8d91bc43f204ba7,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb 3 | 1,1,0.5251956676347125,dd868ca2c498f3384250f431e7767b34,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb 4 | 2,2,-0.13450008028171972,252326b1475c78b26365ebc3430adca2,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb 5 | 3,3,-0.5350296415166964,01a0a328db2dd2a2e8d91bc43f204ba7,d549c11ab8eb14045de2100d6ab90c86,6476528092c639c0ea8f74062f3dd1bb 6 | 4,4,-0.25230289240965403,dd868ca2c498f3384250f431e7767b34,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347 7 | 5,5,0.03042385669738834,dd868ca2c498f3384250f431e7767b34,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347 8 | 6,6,-0.13450008028171972,dd868ca2c498f3384250f431e7767b34,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347 9 | 7,7,-0.8436730092918844,2f872b67f01f5f2f85b24eb87e99d52c,9f43744cf9ae18357d9da7e6b130d3ea,465917095d1b8b7359e781ee782c2c26 10 | 8,8,-0.8436730092918844,2f872b67f01f5f2f85b24eb87e99d52c,9f43744cf9ae18357d9da7e6b130d3ea,465917095d1b8b7359e781ee782c2c26 11 | 9,9,-0.6080673850360158,2f872b67f01f5f2f85b24eb87e99d52c,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb 12 | 10,10,-0.6080673850360158,2f872b67f01f5f2f85b24eb87e99d52c,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb 13 | 11,11,0.36027173065560447,ef42bd4fa577ce60a5b82b6781a08c64,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb 14 | 12,12,0.36027173065560447,ef42bd4fa577ce60a5b82b6781a08c64,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347 15 | 13,13,2.9990547223213335,01a0a328db2dd2a2e8d91bc43f204ba7,ae34b8819f09a1df5bc36a3ebb2ca7c1,75b8605bfcb7433d5bd178b3a0a2d38c 16 | 14,14,1.3498153525302528,252326b1475c78b26365ebc3430adca2,ae34b8819f09a1df5bc36a3ebb2ca7c1,75b8605bfcb7433d5bd178b3a0a2d38c 17 | 15,15,-0.25230289240965403,dd868ca2c498f3384250f431e7767b34,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347 18 | 16,16,0.6901196046138206,252326b1475c78b26365ebc3430adca2,0409e7011c80bccc0ff6442a03d05b29,c395d5f54cf50e223953258801be2697 19 | 17,17,2.339358974404901,252326b1475c78b26365ebc3430adca2,836017345da8a6725b8eed235c5ec3d0,75b8605bfcb7433d5bd178b3a0a2d38c 20 | 18,18,-0.39366626696317525,dd868ca2c498f3384250f431e7767b34,d34d1f81e32b9e570fab69c523409c8d,465917095d1b8b7359e781ee782c2c26 21 | 19,19,-0.8648775154749125,01a0a328db2dd2a2e8d91bc43f204ba7,d34d1f81e32b9e570fab69c523409c8d,465917095d1b8b7359e781ee782c2c26 22 | 20,20,0.8550435415929286,ef42bd4fa577ce60a5b82b6781a08c64,d34d1f81e32b9e570fab69c523409c8d,465917095d1b8b7359e781ee782c2c26 23 | 21,21,-0.6292718912190439,252326b1475c78b26365ebc3430adca2,d34d1f81e32b9e570fab69c523409c8d,465917095d1b8b7359e781ee782c2c26 24 | 22,22,0.8550435415929286,ef42bd4fa577ce60a5b82b6781a08c64,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb 25 | 23,23,-0.2994240172608278,01a0a328db2dd2a2e8d91bc43f204ba7,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb 26 | 24,24,-0.7235141409213913,dd868ca2c498f3384250f431e7767b34,e82eb366fa5e481c13b09c24eeeb036d,465917095d1b8b7359e781ee782c2c26 27 | 25,25,-0.95911976517726,4c508776e494a9f4bc302b34fdc6e76e,e82eb366fa5e481c13b09c24eeeb036d,465917095d1b8b7359e781ee782c2c26 28 | 26,26,-0.6292718912190439,4c508776e494a9f4bc302b34fdc6e76e,683522ca22eaee449c5ac25c2a84ee52,465917095d1b8b7359e781ee782c2c26 29 | 27,27,-0.6292718912190439,4c508776e494a9f4bc302b34fdc6e76e,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb 30 | 28,28,0.3367111682300176,de083a9403b58424cb3834909131a6de,9f43744cf9ae18357d9da7e6b130d3ea,465917095d1b8b7359e781ee782c2c26 31 | 29,29,-0.2994240172608278,4c508776e494a9f4bc302b34fdc6e76e,e98a5a6ce8eca89f5d9084dee8079f60,465917095d1b8b7359e781ee782c2c26 32 | 30,30,-0.2994240172608278,4c508776e494a9f4bc302b34fdc6e76e,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb 33 | 31,31,-0.2994240172608278,4c508776e494a9f4bc302b34fdc6e76e,e98a5a6ce8eca89f5d9084dee8079f60,465917095d1b8b7359e781ee782c2c26 34 | 32,32,0.17178723125090953,de083a9403b58424cb3834909131a6de,75fb3fbc11695c908a1397f96079949b,7ab06c804ac515866a347cb9a54bf2c8 35 | 33,33,-0.32298457968641464,de083a9403b58424cb3834909131a6de,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347 36 | 34,34,0.5016351052091257,de083a9403b58424cb3834909131a6de,836017345da8a6725b8eed235c5ec3d0,75b8605bfcb7433d5bd178b3a0a2d38c 37 | 35,35,-0.5585902039422833,de083a9403b58424cb3834909131a6de,7e7fdf8c70a61405fea41ab1bf7cca25,6476528092c639c0ea8f74062f3dd1bb 38 | 36,36,-0.13450008028171972,dd868ca2c498f3384250f431e7767b34,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb 39 | 37,37,-0.39366626696317525,5c1e1f8eb530ea4363c04483cd523ac4,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb 40 | 38,38,-0.3701057045375884,3f7aceec173a91029fead403c0fa4bc9,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb 41 | 39,39,-0.46434795423993586,a37dab32ea544e235487fb30dc1b29f1,7e7fdf8c70a61405fea41ab1bf7cca25,6476528092c639c0ea8f74062f3dd1bb 42 | 40,40,4.15352228117509,ef42bd4fa577ce60a5b82b6781a08c64,ae34b8819f09a1df5bc36a3ebb2ca7c1,75b8605bfcb7433d5bd178b3a0a2d38c 43 | 41,41,0.03042385669738834,a37dab32ea544e235487fb30dc1b29f1,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347 44 | 42,42,-0.4879085166655227,4c508776e494a9f4bc302b34fdc6e76e,e82eb366fa5e481c13b09c24eeeb036d,465917095d1b8b7359e781ee782c2c26 45 | 43,43,-0.95911976517726,4c508776e494a9f4bc302b34fdc6e76e,e82eb366fa5e481c13b09c24eeeb036d,465917095d1b8b7359e781ee782c2c26 46 | 44,44,-0.46434795423993586,4c508776e494a9f4bc302b34fdc6e76e,683522ca22eaee449c5ac25c2a84ee52,465917095d1b8b7359e781ee782c2c26 47 | 45,45,-0.7941958281981519,4c508776e494a9f4bc302b34fdc6e76e,14692cff9f8196fb8846653310d39719,465917095d1b8b7359e781ee782c2c26 48 | -------------------------------------------------------------------------------- /obp/__init__.py: -------------------------------------------------------------------------------- 1 | from obp import dataset 2 | from obp import ope 3 | from obp import policy 4 | from obp import simulator 5 | from obp import types 6 | from obp import utils 7 | from obp.version import __version__ # noqa 8 | 9 | 10 | __all__ = ["dataset", "ope", "policy", "simulator", "types", "utils", "version"] 11 | -------------------------------------------------------------------------------- /obp/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from obp.dataset.base import BaseBanditDataset 2 | from obp.dataset.base import BaseRealBanditDataset 3 | from obp.dataset.multiclass import MultiClassToBanditReduction 4 | from obp.dataset.real import OpenBanditDataset 5 | from obp.dataset.synthetic import linear_behavior_policy 6 | from obp.dataset.synthetic import linear_reward_function 7 | from obp.dataset.synthetic import logistic_polynomial_reward_function 8 | from obp.dataset.synthetic import logistic_reward_function 9 | from obp.dataset.synthetic import logistic_sparse_reward_function 10 | from obp.dataset.synthetic import polynomial_behavior_policy 11 | from obp.dataset.synthetic import polynomial_reward_function 12 | from obp.dataset.synthetic import sparse_reward_function 13 | from obp.dataset.synthetic import SyntheticBanditDataset 14 | from obp.dataset.synthetic_continuous import linear_behavior_policy_continuous 15 | from obp.dataset.synthetic_continuous import linear_reward_funcion_continuous 16 | from obp.dataset.synthetic_continuous import linear_synthetic_policy_continuous 17 | from obp.dataset.synthetic_continuous import quadratic_reward_funcion_continuous 18 | from obp.dataset.synthetic_continuous import sign_synthetic_policy_continuous 19 | from obp.dataset.synthetic_continuous import SyntheticContinuousBanditDataset 20 | from obp.dataset.synthetic_continuous import threshold_synthetic_policy_continuous 21 | from obp.dataset.synthetic_embed import SyntheticBanditDatasetWithActionEmbeds 22 | from obp.dataset.synthetic_multi import SyntheticMultiLoggersBanditDataset 23 | from obp.dataset.synthetic_slate import action_interaction_reward_function 24 | from obp.dataset.synthetic_slate import linear_behavior_policy_logit 25 | from obp.dataset.synthetic_slate import SyntheticSlateBanditDataset 26 | 27 | 28 | __all__ = [ 29 | "BaseBanditDataset", 30 | "BaseRealBanditDataset", 31 | "OpenBanditDataset", 32 | "SyntheticBanditDataset", 33 | "logistic_reward_function", 34 | "logistic_polynomial_reward_function", 35 | "logistic_sparse_reward_function", 36 | "linear_reward_function", 37 | "polynomial_reward_function", 38 | "sparse_reward_function", 39 | "linear_behavior_policy", 40 | "polynomial_behavior_policy", 41 | "MultiClassToBanditReduction", 42 | "SyntheticContinuousBanditDataset", 43 | "linear_reward_funcion_continuous", 44 | "quadratic_reward_funcion_continuous", 45 | "linear_behavior_policy_continuous", 46 | "linear_synthetic_policy_continuous", 47 | "threshold_synthetic_policy_continuous", 48 | "sign_synthetic_policy_continuous", 49 | "SyntheticSlateBanditDataset", 50 | "action_interaction_reward_function", 51 | "linear_behavior_policy_logit", 52 | "SyntheticBanditDatasetWithActionEmbeds", 53 | "SyntheticMultiLoggersBanditDataset", 54 | ] 55 | -------------------------------------------------------------------------------- /obp/dataset/base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Yuta Saito, Yusuke Narita, and ZOZO Technologies, Inc. All rights reserved. 2 | # Licensed under the Apache 2.0 License. 3 | 4 | """Abstract Base Class for Logged Bandit Feedback.""" 5 | from abc import ABCMeta 6 | from abc import abstractmethod 7 | 8 | 9 | class BaseBanditDataset(metaclass=ABCMeta): 10 | """Base Class for Synthetic Bandit Dataset.""" 11 | 12 | @abstractmethod 13 | def obtain_batch_bandit_feedback(self) -> None: 14 | """Obtain batch logged bandit data.""" 15 | raise NotImplementedError 16 | 17 | 18 | class BaseRealBanditDataset(BaseBanditDataset): 19 | """Base Class for Real-World Bandit Dataset.""" 20 | 21 | @abstractmethod 22 | def load_raw_data(self) -> None: 23 | """Load raw dataset.""" 24 | raise NotImplementedError 25 | 26 | @abstractmethod 27 | def pre_process(self) -> None: 28 | """Preprocess raw dataset.""" 29 | raise NotImplementedError 30 | -------------------------------------------------------------------------------- /obp/dataset/obd/bts/men/item_context.csv: -------------------------------------------------------------------------------- 1 | ,item_id,item_feature_0,item_feature_1,item_feature_2,item_feature_3 2 | 0,0,-0.6771831139635117,c82d13885d8bf7a3b8b9fa6f0842ba60,088abf8a8657959e46ac19af8da80d15,8ea65bc866b36a8f00ae913e0c3acc29 3 | 1,1,-0.7202996418188664,77490d05a721c6d93edf580642ffd8bd,088abf8a8657959e46ac19af8da80d15,8ea65bc866b36a8f00ae913e0c3acc29 4 | 2,2,0.7456623052631924,77490d05a721c6d93edf580642ffd8bd,bfcce809dad48aadc7fcbe714f9eabd7,7a0c97ee71eb7985bd0a6271ce57cec5 5 | 3,3,-0.6987413778911891,135f410ec21307919cd92df77f1e2a36,ff2de7df709624e5b79199b850382ea0,68f8b5168b2a322db725a6cd6f5c900b 6 | 4,4,1.6511093902256406,61a525de9976c0f3fa29d400caf26c56,ee987234ffe4f3d901846ac3f7417738,7a0c97ee71eb7985bd0a6271ce57cec5 7 | 5,5,0.14203091528822703,61a525de9976c0f3fa29d400caf26c56,bb7caf7f0c11f7827fb23b331777b871,8ea65bc866b36a8f00ae913e0c3acc29 8 | 6,6,1.6511093902256406,c82d13885d8bf7a3b8b9fa6f0842ba60,818dfe387422471f09a34db693a78212,7a0c97ee71eb7985bd0a6271ce57cec5 9 | 7,7,2.8583721701755715,61a525de9976c0f3fa29d400caf26c56,bfcce809dad48aadc7fcbe714f9eabd7,7a0c97ee71eb7985bd0a6271ce57cec5 10 | 8,8,1.349293695238158,61a525de9976c0f3fa29d400caf26c56,7daaf8717f83289266063b6cc1728087,7a0c97ee71eb7985bd0a6271ce57cec5 11 | 9,9,1.1983858477444165,135f410ec21307919cd92df77f1e2a36,bfcce809dad48aadc7fcbe714f9eabd7,7a0c97ee71eb7985bd0a6271ce57cec5 12 | 10,10,1.5864345984426087,135f410ec21307919cd92df77f1e2a36,bfcce809dad48aadc7fcbe714f9eabd7,7a0c97ee71eb7985bd0a6271ce57cec5 13 | 11,11,0.44384661027570976,c82d13885d8bf7a3b8b9fa6f0842ba60,24ea3b3a472c51dd6299ebdfb220a55f,0c3b42b13b5a49fcb746da9f60e63717 14 | 12,12,1.1983858477444165,c82d13885d8bf7a3b8b9fa6f0842ba60,0e077f97ef2dcda0dc404f873fc5f96c,7a0c97ee71eb7985bd0a6271ce57cec5 15 | 13,13,0.6163127216971285,135f410ec21307919cd92df77f1e2a36,0e077f97ef2dcda0dc404f873fc5f96c,7a0c97ee71eb7985bd0a6271ce57cec5 16 | 14,14,-1.000557072878672,135f410ec21307919cd92df77f1e2a36,865945b5265169a2176a6e5f084ab2eb,8ea65bc866b36a8f00ae913e0c3acc29 17 | 15,15,-0.37536741897602904,c82d13885d8bf7a3b8b9fa6f0842ba60,786ff5d72b02d1e68a43508d9579977d,68f8b5168b2a322db725a6cd6f5c900b 18 | 16,16,-0.5909500582528024,c82d13885d8bf7a3b8b9fa6f0842ba60,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b 19 | 17,17,-0.6987413778911891,135f410ec21307919cd92df77f1e2a36,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b 20 | 18,18,-0.9143240171679625,77490d05a721c6d93edf580642ffd8bd,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b 21 | 19,19,-0.7634161696742211,77490d05a721c6d93edf580642ffd8bd,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b 22 | 20,20,-0.6125083221804798,77490d05a721c6d93edf580642ffd8bd,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b 23 | 21,21,-0.6987413778911891,135f410ec21307919cd92df77f1e2a36,ff2de7df709624e5b79199b850382ea0,68f8b5168b2a322db725a6cd6f5c900b 24 | 22,22,-0.6987413778911891,17ef71cb22e550d31e5eaa4d629c4abd,088abf8a8657959e46ac19af8da80d15,8ea65bc866b36a8f00ae913e0c3acc29 25 | 23,23,-0.5693917943251251,e1b1451d555c82a01874347dbecdfeae,01b306b40a448bff555c06d5d72c0171,7a0c97ee71eb7985bd0a6271ce57cec5 26 | 24,24,0.4222883463480324,f15de9aa508214df06454736b488717c,7daaf8717f83289266063b6cc1728087,7a0c97ee71eb7985bd0a6271ce57cec5 27 | 25,25,-0.4616004746867384,135f410ec21307919cd92df77f1e2a36,088abf8a8657959e46ac19af8da80d15,8ea65bc866b36a8f00ae913e0c3acc29 28 | 26,26,0.8965701527569339,77490d05a721c6d93edf580642ffd8bd,746facf4548f3da6d628b8e35bf9e6ec,7a0c97ee71eb7985bd0a6271ce57cec5 29 | 27,27,-0.8496492253849305,17ef71cb22e550d31e5eaa4d629c4abd,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b 30 | 28,28,-1.0652318646617038,17ef71cb22e550d31e5eaa4d629c4abd,a46137fea33ac48f0809591a76630ea5,68f8b5168b2a322db725a6cd6f5c900b 31 | 29,29,-0.8496492253849305,17ef71cb22e550d31e5eaa4d629c4abd,008dc8758000efaf5b318227fcb71f8d,8ea65bc866b36a8f00ae913e0c3acc29 32 | 30,30,-0.9143240171679625,17ef71cb22e550d31e5eaa4d629c4abd,865945b5265169a2176a6e5f084ab2eb,8ea65bc866b36a8f00ae913e0c3acc29 33 | 31,31,-0.4616004746867384,e1b1451d555c82a01874347dbecdfeae,008dc8758000efaf5b318227fcb71f8d,8ea65bc866b36a8f00ae913e0c3acc29 34 | 32,32,-0.5262752664697704,f15de9aa508214df06454736b488717c,a46137fea33ac48f0809591a76630ea5,68f8b5168b2a322db725a6cd6f5c900b 35 | 33,33,-0.6125083221804798,f15de9aa508214df06454736b488717c,088abf8a8657959e46ac19af8da80d15,8ea65bc866b36a8f00ae913e0c3acc29 36 | -------------------------------------------------------------------------------- /obp/dataset/obd/bts/women/item_context.csv: -------------------------------------------------------------------------------- 1 | ,item_id,item_feature_0,item_feature_1,item_feature_2,item_feature_3 2 | 0,0,-0.3701057045375884,37784fea97b5827eeaf4a23dbff98b73,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1 3 | 1,1,0.5251956676347125,3220392a73f0fb73e5509a3f6b89ae64,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57 4 | 2,2,-0.13450008028171972,1f0bd59babc615f7876d70abd81b0703,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1 5 | 3,3,-0.5350296415166964,37784fea97b5827eeaf4a23dbff98b73,30e4f82eec0c5210c403aab8007a5881,2951c610187f9e9e8281ecd31a156bd1 6 | 4,4,-0.25230289240965403,3220392a73f0fb73e5509a3f6b89ae64,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d 7 | 5,5,0.03042385669738834,3220392a73f0fb73e5509a3f6b89ae64,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d 8 | 6,6,-0.13450008028171972,3220392a73f0fb73e5509a3f6b89ae64,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d 9 | 7,7,-0.8436730092918844,54130721ea2331736ec3cd62c6ff2a0a,92b8c61a1a556299172a5705f5a927db,360f242a6660cf5ee5249dc3c197fe62 10 | 8,8,-0.8436730092918844,54130721ea2331736ec3cd62c6ff2a0a,92b8c61a1a556299172a5705f5a927db,360f242a6660cf5ee5249dc3c197fe62 11 | 9,9,-0.6080673850360158,54130721ea2331736ec3cd62c6ff2a0a,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57 12 | 10,10,-0.6080673850360158,54130721ea2331736ec3cd62c6ff2a0a,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57 13 | 11,11,0.36027173065560447,e88594e2095dc09c70763bd14b6bb16e,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1 14 | 12,12,0.36027173065560447,e88594e2095dc09c70763bd14b6bb16e,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d 15 | 13,13,2.9990547223213335,37784fea97b5827eeaf4a23dbff98b73,cedae94e7ca42bac679afaf582fda539,e6dceba864edcc7bf60d38616a52a13d 16 | 14,14,1.3498153525302528,1f0bd59babc615f7876d70abd81b0703,cedae94e7ca42bac679afaf582fda539,e6dceba864edcc7bf60d38616a52a13d 17 | 15,15,-0.25230289240965403,3220392a73f0fb73e5509a3f6b89ae64,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d 18 | 16,16,0.6901196046138206,1f0bd59babc615f7876d70abd81b0703,40b2c280a2676cf7e83a2c19a333d4a2,7ce347fef632da56f7d0cd2e3d96c9d2 19 | 17,17,2.339358974404901,1f0bd59babc615f7876d70abd81b0703,5e32ca87b332cb657386052c2962f06f,e6dceba864edcc7bf60d38616a52a13d 20 | 18,18,-0.39366626696317525,3220392a73f0fb73e5509a3f6b89ae64,c72dd70c97975aad5865c138b5c3c501,360f242a6660cf5ee5249dc3c197fe62 21 | 19,19,-0.8648775154749125,37784fea97b5827eeaf4a23dbff98b73,c72dd70c97975aad5865c138b5c3c501,360f242a6660cf5ee5249dc3c197fe62 22 | 20,20,0.8550435415929286,e88594e2095dc09c70763bd14b6bb16e,c72dd70c97975aad5865c138b5c3c501,360f242a6660cf5ee5249dc3c197fe62 23 | 21,21,-0.6292718912190439,1f0bd59babc615f7876d70abd81b0703,c72dd70c97975aad5865c138b5c3c501,360f242a6660cf5ee5249dc3c197fe62 24 | 22,22,0.8550435415929286,e88594e2095dc09c70763bd14b6bb16e,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57 25 | 23,23,-0.2994240172608278,37784fea97b5827eeaf4a23dbff98b73,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57 26 | 24,24,-0.7235141409213913,3220392a73f0fb73e5509a3f6b89ae64,9dc794e30838384fe6068c9636d35d39,360f242a6660cf5ee5249dc3c197fe62 27 | 25,25,-0.95911976517726,980e8ad619a60423e616b67cfb8e09b9,9dc794e30838384fe6068c9636d35d39,360f242a6660cf5ee5249dc3c197fe62 28 | 26,26,-0.6292718912190439,980e8ad619a60423e616b67cfb8e09b9,a164a8f4dbd09847e25a3956e12bccff,360f242a6660cf5ee5249dc3c197fe62 29 | 27,27,-0.6292718912190439,980e8ad619a60423e616b67cfb8e09b9,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57 30 | 28,28,0.3367111682300176,72f3f67e8e9907b474c547847f8d5fd3,92b8c61a1a556299172a5705f5a927db,360f242a6660cf5ee5249dc3c197fe62 31 | 29,29,-0.2994240172608278,980e8ad619a60423e616b67cfb8e09b9,a3438007435a63dbe0ea33f5a0d1e84a,360f242a6660cf5ee5249dc3c197fe62 32 | 30,30,-0.2994240172608278,980e8ad619a60423e616b67cfb8e09b9,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1 33 | 31,31,-0.2994240172608278,980e8ad619a60423e616b67cfb8e09b9,a3438007435a63dbe0ea33f5a0d1e84a,360f242a6660cf5ee5249dc3c197fe62 34 | 32,32,0.17178723125090953,72f3f67e8e9907b474c547847f8d5fd3,04a71d6c9b0aa3b9e462a6923d1e8393,25e55d04edea9bd0a20aff26ac263414 35 | 33,33,-0.32298457968641464,72f3f67e8e9907b474c547847f8d5fd3,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d 36 | 34,34,0.5016351052091257,72f3f67e8e9907b474c547847f8d5fd3,5e32ca87b332cb657386052c2962f06f,e6dceba864edcc7bf60d38616a52a13d 37 | 35,35,-0.5585902039422833,72f3f67e8e9907b474c547847f8d5fd3,b02dadb348cf4ac330bf1d90cb80237e,2951c610187f9e9e8281ecd31a156bd1 38 | 36,36,-0.13450008028171972,3220392a73f0fb73e5509a3f6b89ae64,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1 39 | 37,37,-0.39366626696317525,f3a3cc32a3967214164eb2709555b3f7,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1 40 | 38,38,-0.3701057045375884,cd7b41b498ea6d9180ad3fd389422c39,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57 41 | 39,39,-0.46434795423993586,734fc1b871abffa4db3be9bc16ad80f7,b02dadb348cf4ac330bf1d90cb80237e,2951c610187f9e9e8281ecd31a156bd1 42 | 40,40,4.15352228117509,e88594e2095dc09c70763bd14b6bb16e,cedae94e7ca42bac679afaf582fda539,e6dceba864edcc7bf60d38616a52a13d 43 | 41,41,0.03042385669738834,734fc1b871abffa4db3be9bc16ad80f7,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d 44 | 42,42,-0.4879085166655227,980e8ad619a60423e616b67cfb8e09b9,9dc794e30838384fe6068c9636d35d39,360f242a6660cf5ee5249dc3c197fe62 45 | 43,43,-0.95911976517726,980e8ad619a60423e616b67cfb8e09b9,9dc794e30838384fe6068c9636d35d39,360f242a6660cf5ee5249dc3c197fe62 46 | 44,44,-0.46434795423993586,980e8ad619a60423e616b67cfb8e09b9,a164a8f4dbd09847e25a3956e12bccff,360f242a6660cf5ee5249dc3c197fe62 47 | 45,45,-0.7941958281981519,980e8ad619a60423e616b67cfb8e09b9,1b433010466b794694fc6f5f29eac0d8,360f242a6660cf5ee5249dc3c197fe62 48 | -------------------------------------------------------------------------------- /obp/dataset/obd/random/men/item_context.csv: -------------------------------------------------------------------------------- 1 | ,item_id,item_feature_0,item_feature_1,item_feature_2,item_feature_3 2 | 0,0,-0.6771831139635117,ceca20033d7d36b74dc683ddfb804aa7,eb6f942c01859574cb88d2e62bf84354,795091554fd8f6b4a0ca7df81bf50a64 3 | 1,1,-0.7202996418188664,270de57201b8ec18df9a72ed7ecf20eb,eb6f942c01859574cb88d2e62bf84354,795091554fd8f6b4a0ca7df81bf50a64 4 | 2,2,0.7456623052631924,270de57201b8ec18df9a72ed7ecf20eb,2ba5916b91fd2d0688459ba79f033a9b,14fb049a96497a5deef345c1c38b2467 5 | 3,3,-0.6987413778911891,cb4655bc2d2e54055efefb998883d6fe,1d8ba92fbaa83078dfe330d66b81e5d6,5cc21cc265333250f10b13783ab06472 6 | 4,4,1.6511093902256406,ca9488139d82dbbf68a4e71fc7fe52f9,f65e8237cca7eb6b12f4f009a28a6f72,14fb049a96497a5deef345c1c38b2467 7 | 5,5,0.14203091528822703,ca9488139d82dbbf68a4e71fc7fe52f9,571216af60c365e6a05e1c33c7041f5f,795091554fd8f6b4a0ca7df81bf50a64 8 | 6,6,1.6511093902256406,ceca20033d7d36b74dc683ddfb804aa7,d56aaef6375c7844851af69b354331ba,14fb049a96497a5deef345c1c38b2467 9 | 7,7,2.8583721701755715,ca9488139d82dbbf68a4e71fc7fe52f9,2ba5916b91fd2d0688459ba79f033a9b,14fb049a96497a5deef345c1c38b2467 10 | 8,8,1.349293695238158,ca9488139d82dbbf68a4e71fc7fe52f9,ef0257571cb05e9c0bba5446f9cfb0c9,14fb049a96497a5deef345c1c38b2467 11 | 9,9,1.1983858477444165,cb4655bc2d2e54055efefb998883d6fe,2ba5916b91fd2d0688459ba79f033a9b,14fb049a96497a5deef345c1c38b2467 12 | 10,10,1.5864345984426087,cb4655bc2d2e54055efefb998883d6fe,2ba5916b91fd2d0688459ba79f033a9b,14fb049a96497a5deef345c1c38b2467 13 | 11,11,0.44384661027570976,ceca20033d7d36b74dc683ddfb804aa7,b1dbb432e49fb71cc3b3e820ff31f3ad,6893a4373a4e271e7f03b7a4bdfde4a3 14 | 12,12,1.1983858477444165,ceca20033d7d36b74dc683ddfb804aa7,09122ea36aaf2a8dff8f089286af7cf3,14fb049a96497a5deef345c1c38b2467 15 | 13,13,0.6163127216971285,cb4655bc2d2e54055efefb998883d6fe,09122ea36aaf2a8dff8f089286af7cf3,14fb049a96497a5deef345c1c38b2467 16 | 14,14,-1.000557072878672,cb4655bc2d2e54055efefb998883d6fe,ec5fb795fb7b3a111ad15e1506487535,795091554fd8f6b4a0ca7df81bf50a64 17 | 15,15,-0.37536741897602904,ceca20033d7d36b74dc683ddfb804aa7,e26d13daee6e371dead874b89752bbbe,5cc21cc265333250f10b13783ab06472 18 | 16,16,-0.5909500582528024,ceca20033d7d36b74dc683ddfb804aa7,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472 19 | 17,17,-0.6987413778911891,cb4655bc2d2e54055efefb998883d6fe,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472 20 | 18,18,-0.9143240171679625,270de57201b8ec18df9a72ed7ecf20eb,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472 21 | 19,19,-0.7634161696742211,270de57201b8ec18df9a72ed7ecf20eb,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472 22 | 20,20,-0.6125083221804798,270de57201b8ec18df9a72ed7ecf20eb,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472 23 | 21,21,-0.6987413778911891,cb4655bc2d2e54055efefb998883d6fe,1d8ba92fbaa83078dfe330d66b81e5d6,5cc21cc265333250f10b13783ab06472 24 | 22,22,-0.6987413778911891,dbb8044a5cc8d79d0e5c3cf996e2d0b9,eb6f942c01859574cb88d2e62bf84354,795091554fd8f6b4a0ca7df81bf50a64 25 | 23,23,-0.5693917943251251,0450516d22e9e70b0ee136549576d0e7,937bfc1b19face0ab0a21dddaeaf19cd,14fb049a96497a5deef345c1c38b2467 26 | 24,24,0.4222883463480324,314759c31d4b75b54dfbbeb887f7bbe8,ef0257571cb05e9c0bba5446f9cfb0c9,14fb049a96497a5deef345c1c38b2467 27 | 25,25,-0.4616004746867384,cb4655bc2d2e54055efefb998883d6fe,eb6f942c01859574cb88d2e62bf84354,795091554fd8f6b4a0ca7df81bf50a64 28 | 26,26,0.8965701527569339,270de57201b8ec18df9a72ed7ecf20eb,ff86755a0252ce6d030f37e89025f60f,14fb049a96497a5deef345c1c38b2467 29 | 27,27,-0.8496492253849305,dbb8044a5cc8d79d0e5c3cf996e2d0b9,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472 30 | 28,28,-1.0652318646617038,dbb8044a5cc8d79d0e5c3cf996e2d0b9,3f7cf3ddf1cc36d8310a8c0a48187aa9,5cc21cc265333250f10b13783ab06472 31 | 29,29,-0.8496492253849305,dbb8044a5cc8d79d0e5c3cf996e2d0b9,5adc59d478af904390b1de5af7f33d45,795091554fd8f6b4a0ca7df81bf50a64 32 | 30,30,-0.9143240171679625,dbb8044a5cc8d79d0e5c3cf996e2d0b9,ec5fb795fb7b3a111ad15e1506487535,795091554fd8f6b4a0ca7df81bf50a64 33 | 31,31,-0.4616004746867384,0450516d22e9e70b0ee136549576d0e7,5adc59d478af904390b1de5af7f33d45,795091554fd8f6b4a0ca7df81bf50a64 34 | 32,32,-0.5262752664697704,314759c31d4b75b54dfbbeb887f7bbe8,3f7cf3ddf1cc36d8310a8c0a48187aa9,5cc21cc265333250f10b13783ab06472 35 | 33,33,-0.6125083221804798,314759c31d4b75b54dfbbeb887f7bbe8,eb6f942c01859574cb88d2e62bf84354,795091554fd8f6b4a0ca7df81bf50a64 36 | -------------------------------------------------------------------------------- /obp/dataset/obd/random/women/item_context.csv: -------------------------------------------------------------------------------- 1 | ,item_id,item_feature_0,item_feature_1,item_feature_2,item_feature_3 2 | 0,0,-0.3701057045375884,01a0a328db2dd2a2e8d91bc43f204ba7,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb 3 | 1,1,0.5251956676347125,dd868ca2c498f3384250f431e7767b34,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb 4 | 2,2,-0.13450008028171972,252326b1475c78b26365ebc3430adca2,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb 5 | 3,3,-0.5350296415166964,01a0a328db2dd2a2e8d91bc43f204ba7,d549c11ab8eb14045de2100d6ab90c86,6476528092c639c0ea8f74062f3dd1bb 6 | 4,4,-0.25230289240965403,dd868ca2c498f3384250f431e7767b34,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347 7 | 5,5,0.03042385669738834,dd868ca2c498f3384250f431e7767b34,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347 8 | 6,6,-0.13450008028171972,dd868ca2c498f3384250f431e7767b34,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347 9 | 7,7,-0.8436730092918844,2f872b67f01f5f2f85b24eb87e99d52c,9f43744cf9ae18357d9da7e6b130d3ea,465917095d1b8b7359e781ee782c2c26 10 | 8,8,-0.8436730092918844,2f872b67f01f5f2f85b24eb87e99d52c,9f43744cf9ae18357d9da7e6b130d3ea,465917095d1b8b7359e781ee782c2c26 11 | 9,9,-0.6080673850360158,2f872b67f01f5f2f85b24eb87e99d52c,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb 12 | 10,10,-0.6080673850360158,2f872b67f01f5f2f85b24eb87e99d52c,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb 13 | 11,11,0.36027173065560447,ef42bd4fa577ce60a5b82b6781a08c64,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb 14 | 12,12,0.36027173065560447,ef42bd4fa577ce60a5b82b6781a08c64,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347 15 | 13,13,2.9990547223213335,01a0a328db2dd2a2e8d91bc43f204ba7,ae34b8819f09a1df5bc36a3ebb2ca7c1,75b8605bfcb7433d5bd178b3a0a2d38c 16 | 14,14,1.3498153525302528,252326b1475c78b26365ebc3430adca2,ae34b8819f09a1df5bc36a3ebb2ca7c1,75b8605bfcb7433d5bd178b3a0a2d38c 17 | 15,15,-0.25230289240965403,dd868ca2c498f3384250f431e7767b34,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347 18 | 16,16,0.6901196046138206,252326b1475c78b26365ebc3430adca2,0409e7011c80bccc0ff6442a03d05b29,c395d5f54cf50e223953258801be2697 19 | 17,17,2.339358974404901,252326b1475c78b26365ebc3430adca2,836017345da8a6725b8eed235c5ec3d0,75b8605bfcb7433d5bd178b3a0a2d38c 20 | 18,18,-0.39366626696317525,dd868ca2c498f3384250f431e7767b34,d34d1f81e32b9e570fab69c523409c8d,465917095d1b8b7359e781ee782c2c26 21 | 19,19,-0.8648775154749125,01a0a328db2dd2a2e8d91bc43f204ba7,d34d1f81e32b9e570fab69c523409c8d,465917095d1b8b7359e781ee782c2c26 22 | 20,20,0.8550435415929286,ef42bd4fa577ce60a5b82b6781a08c64,d34d1f81e32b9e570fab69c523409c8d,465917095d1b8b7359e781ee782c2c26 23 | 21,21,-0.6292718912190439,252326b1475c78b26365ebc3430adca2,d34d1f81e32b9e570fab69c523409c8d,465917095d1b8b7359e781ee782c2c26 24 | 22,22,0.8550435415929286,ef42bd4fa577ce60a5b82b6781a08c64,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb 25 | 23,23,-0.2994240172608278,01a0a328db2dd2a2e8d91bc43f204ba7,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb 26 | 24,24,-0.7235141409213913,dd868ca2c498f3384250f431e7767b34,e82eb366fa5e481c13b09c24eeeb036d,465917095d1b8b7359e781ee782c2c26 27 | 25,25,-0.95911976517726,4c508776e494a9f4bc302b34fdc6e76e,e82eb366fa5e481c13b09c24eeeb036d,465917095d1b8b7359e781ee782c2c26 28 | 26,26,-0.6292718912190439,4c508776e494a9f4bc302b34fdc6e76e,683522ca22eaee449c5ac25c2a84ee52,465917095d1b8b7359e781ee782c2c26 29 | 27,27,-0.6292718912190439,4c508776e494a9f4bc302b34fdc6e76e,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb 30 | 28,28,0.3367111682300176,de083a9403b58424cb3834909131a6de,9f43744cf9ae18357d9da7e6b130d3ea,465917095d1b8b7359e781ee782c2c26 31 | 29,29,-0.2994240172608278,4c508776e494a9f4bc302b34fdc6e76e,e98a5a6ce8eca89f5d9084dee8079f60,465917095d1b8b7359e781ee782c2c26 32 | 30,30,-0.2994240172608278,4c508776e494a9f4bc302b34fdc6e76e,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb 33 | 31,31,-0.2994240172608278,4c508776e494a9f4bc302b34fdc6e76e,e98a5a6ce8eca89f5d9084dee8079f60,465917095d1b8b7359e781ee782c2c26 34 | 32,32,0.17178723125090953,de083a9403b58424cb3834909131a6de,75fb3fbc11695c908a1397f96079949b,7ab06c804ac515866a347cb9a54bf2c8 35 | 33,33,-0.32298457968641464,de083a9403b58424cb3834909131a6de,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347 36 | 34,34,0.5016351052091257,de083a9403b58424cb3834909131a6de,836017345da8a6725b8eed235c5ec3d0,75b8605bfcb7433d5bd178b3a0a2d38c 37 | 35,35,-0.5585902039422833,de083a9403b58424cb3834909131a6de,7e7fdf8c70a61405fea41ab1bf7cca25,6476528092c639c0ea8f74062f3dd1bb 38 | 36,36,-0.13450008028171972,dd868ca2c498f3384250f431e7767b34,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb 39 | 37,37,-0.39366626696317525,5c1e1f8eb530ea4363c04483cd523ac4,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb 40 | 38,38,-0.3701057045375884,3f7aceec173a91029fead403c0fa4bc9,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb 41 | 39,39,-0.46434795423993586,a37dab32ea544e235487fb30dc1b29f1,7e7fdf8c70a61405fea41ab1bf7cca25,6476528092c639c0ea8f74062f3dd1bb 42 | 40,40,4.15352228117509,ef42bd4fa577ce60a5b82b6781a08c64,ae34b8819f09a1df5bc36a3ebb2ca7c1,75b8605bfcb7433d5bd178b3a0a2d38c 43 | 41,41,0.03042385669738834,a37dab32ea544e235487fb30dc1b29f1,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347 44 | 42,42,-0.4879085166655227,4c508776e494a9f4bc302b34fdc6e76e,e82eb366fa5e481c13b09c24eeeb036d,465917095d1b8b7359e781ee782c2c26 45 | 43,43,-0.95911976517726,4c508776e494a9f4bc302b34fdc6e76e,e82eb366fa5e481c13b09c24eeeb036d,465917095d1b8b7359e781ee782c2c26 46 | 44,44,-0.46434795423993586,4c508776e494a9f4bc302b34fdc6e76e,683522ca22eaee449c5ac25c2a84ee52,465917095d1b8b7359e781ee782c2c26 47 | 45,45,-0.7941958281981519,4c508776e494a9f4bc302b34fdc6e76e,14692cff9f8196fb8846653310d39719,465917095d1b8b7359e781ee782c2c26 48 | -------------------------------------------------------------------------------- /obp/dataset/reward_type.py: -------------------------------------------------------------------------------- 1 | import enum 2 | 3 | 4 | class RewardType(enum.Enum): 5 | """Reward type. 6 | 7 | Attributes 8 | ---------- 9 | BINARY: 10 | The reward type is binary. 11 | CONTINUOUS: 12 | The reward type is continuous. 13 | """ 14 | 15 | BINARY = "binary" 16 | CONTINUOUS = "continuous" 17 | 18 | def __repr__(self) -> str: 19 | 20 | return str(self) 21 | -------------------------------------------------------------------------------- /obp/ope/__init__.py: -------------------------------------------------------------------------------- 1 | from obp.ope.classification_model import ImportanceWeightEstimator 2 | from obp.ope.classification_model import PropensityScoreEstimator 3 | from obp.ope.estimators import BalancedInverseProbabilityWeighting 4 | from obp.ope.estimators import BaseOffPolicyEstimator 5 | from obp.ope.estimators import DirectMethod 6 | from obp.ope.estimators import DoublyRobust 7 | from obp.ope.estimators import DoublyRobustWithShrinkage 8 | from obp.ope.estimators import InverseProbabilityWeighting 9 | from obp.ope.estimators import ReplayMethod 10 | from obp.ope.estimators import SelfNormalizedDoublyRobust 11 | from obp.ope.estimators import SelfNormalizedInverseProbabilityWeighting 12 | from obp.ope.estimators import SubGaussianDoublyRobust 13 | from obp.ope.estimators import SubGaussianInverseProbabilityWeighting 14 | from obp.ope.estimators import SwitchDoublyRobust 15 | from obp.ope.estimators_continuous import ( 16 | KernelizedSelfNormalizedInverseProbabilityWeighting, 17 | ) 18 | from obp.ope.estimators_continuous import BaseContinuousOffPolicyEstimator 19 | from obp.ope.estimators_continuous import cosine_kernel 20 | from obp.ope.estimators_continuous import epanechnikov_kernel 21 | from obp.ope.estimators_continuous import gaussian_kernel 22 | from obp.ope.estimators_continuous import KernelizedDoublyRobust 23 | from obp.ope.estimators_continuous import KernelizedInverseProbabilityWeighting 24 | from obp.ope.estimators_continuous import triangular_kernel 25 | from obp.ope.estimators_embed import ( 26 | SelfNormalizedMarginalizedInverseProbabilityWeighting, 27 | ) 28 | from obp.ope.estimators_embed import MarginalizedInverseProbabilityWeighting 29 | from obp.ope.estimators_multi import BaseMultiLoggersOffPolicyEstimator 30 | from obp.ope.estimators_multi import MultiLoggersBalancedDoublyRobust 31 | from obp.ope.estimators_multi import MultiLoggersBalancedInverseProbabilityWeighting 32 | from obp.ope.estimators_multi import MultiLoggersNaiveDoublyRobust 33 | from obp.ope.estimators_multi import MultiLoggersNaiveInverseProbabilityWeighting 34 | from obp.ope.estimators_multi import MultiLoggersWeightedDoublyRobust 35 | from obp.ope.estimators_multi import MultiLoggersWeightedInverseProbabilityWeighting 36 | from obp.ope.estimators_slate import SelfNormalizedSlateIndependentIPS 37 | from obp.ope.estimators_slate import SelfNormalizedSlateRewardInteractionIPS 38 | from obp.ope.estimators_slate import SelfNormalizedSlateStandardIPS 39 | from obp.ope.estimators_slate import SlateCascadeDoublyRobust 40 | from obp.ope.estimators_slate import SlateIndependentIPS 41 | from obp.ope.estimators_slate import SlateRewardInteractionIPS 42 | from obp.ope.estimators_slate import SlateStandardIPS 43 | from obp.ope.estimators_tuning import DoublyRobustTuning 44 | from obp.ope.estimators_tuning import DoublyRobustWithShrinkageTuning 45 | from obp.ope.estimators_tuning import InverseProbabilityWeightingTuning 46 | from obp.ope.estimators_tuning import SubGaussianDoublyRobustTuning 47 | from obp.ope.estimators_tuning import SubGaussianInverseProbabilityWeightingTuning 48 | from obp.ope.estimators_tuning import SwitchDoublyRobustTuning 49 | from obp.ope.meta import OffPolicyEvaluation 50 | from obp.ope.meta_continuous import ContinuousOffPolicyEvaluation 51 | from obp.ope.meta_multi import MultiLoggersOffPolicyEvaluation 52 | from obp.ope.meta_slate import SlateOffPolicyEvaluation 53 | from obp.ope.regression_model import RegressionModel 54 | from obp.ope.regression_model_slate import SlateRegressionModel 55 | 56 | 57 | __all__ = [ 58 | "BaseOffPolicyEstimator", 59 | "ReplayMethod", 60 | "InverseProbabilityWeighting", 61 | "SelfNormalizedInverseProbabilityWeighting", 62 | "DirectMethod", 63 | "DoublyRobust", 64 | "SelfNormalizedDoublyRobust", 65 | "SwitchDoublyRobust", 66 | "DoublyRobustWithShrinkage", 67 | "SubGaussianInverseProbabilityWeighting", 68 | "SubGaussianDoublyRobust", 69 | "InverseProbabilityWeightingTuning", 70 | "DoublyRobustTuning", 71 | "SwitchDoublyRobustTuning", 72 | "DoublyRobustWithShrinkageTuning", 73 | "SubGaussianInverseProbabilityWeightingTuning", 74 | "SubGaussianDoublyRobustTuning", 75 | "MarginalizedInverseProbabilityWeighting", 76 | "SelfNormalizedMarginalizedInverseProbabilityWeighting", 77 | "BaseMultiLoggersOffPolicyEstimator", 78 | "MultiLoggersNaiveInverseProbabilityWeighting", 79 | "MultiLoggersWeightedInverseProbabilityWeighting", 80 | "MultiLoggersBalancedInverseProbabilityWeighting", 81 | "MultiLoggersNaiveDoublyRobust", 82 | "MultiLoggersBalancedDoublyRobust", 83 | "MultiLoggersWeightedDoublyRobust", 84 | "OffPolicyEvaluation", 85 | "SlateOffPolicyEvaluation", 86 | "ContinuousOffPolicyEvaluation", 87 | "MultiLoggersOffPolicyEvaluation", 88 | "RegressionModel", 89 | "SlateRegressionModel", 90 | "SlateStandardIPS", 91 | "SlateIndependentIPS", 92 | "SlateRewardInteractionIPS", 93 | "SlateCascadeDoublyRobust", 94 | "SelfNormalizedSlateRewardInteractionIPS", 95 | "SelfNormalizedSlateIndependentIPS", 96 | "SelfNormalizedSlateStandardIPS", 97 | "BalancedInverseProbabilityWeighting", 98 | "ImportanceWeightEstimator", 99 | "PropensityScoreEstimator", 100 | "BaseContinuousOffPolicyEstimator", 101 | "KernelizedInverseProbabilityWeighting", 102 | "KernelizedSelfNormalizedInverseProbabilityWeighting", 103 | "KernelizedDoublyRobust", 104 | "triangular_kernel", 105 | "gaussian_kernel", 106 | "epanechnikov_kernel", 107 | "cosine_kernel", 108 | ] 109 | 110 | __all_estimators__ = [ 111 | "ReplayMethod", 112 | "InverseProbabilityWeighting", 113 | "SelfNormalizedInverseProbabilityWeighting", 114 | "DirectMethod", 115 | "DoublyRobust", 116 | "DoublyRobustWithShrinkage", 117 | "SwitchDoublyRobust", 118 | "SelfNormalizedDoublyRobust", 119 | "SubGaussianInverseProbabilityWeighting", 120 | "SubGaussianDoublyRobust", 121 | "BalancedInverseProbabilityWeighting", 122 | ] 123 | 124 | 125 | __all_estimators_tuning__ = [ 126 | "InverseProbabilityWeightingTuning", 127 | "DoublyRobustTuning", 128 | "SwitchDoublyRobustTuning", 129 | "DoublyRobustWithShrinkageTuning", 130 | ] 131 | 132 | 133 | __all_estimators_tuning_sg__ = [ 134 | "SubGaussianInverseProbabilityWeightingTuning", 135 | "SubGaussianDoublyRobustTuning", 136 | ] 137 | -------------------------------------------------------------------------------- /obp/policy/__init__.py: -------------------------------------------------------------------------------- 1 | from obp.policy.base import BaseContextFreePolicy 2 | from obp.policy.base import BaseContextualPolicy 3 | from obp.policy.base import BaseContinuousOfflinePolicyLearner 4 | from obp.policy.base import BaseOfflinePolicyLearner 5 | from obp.policy.contextfree import BernoulliTS 6 | from obp.policy.contextfree import EpsilonGreedy 7 | from obp.policy.contextfree import Random 8 | from obp.policy.linear import LinEpsilonGreedy 9 | from obp.policy.linear import LinTS 10 | from obp.policy.linear import LinUCB 11 | from obp.policy.logistic import LogisticEpsilonGreedy 12 | from obp.policy.logistic import LogisticTS 13 | from obp.policy.logistic import LogisticUCB 14 | from obp.policy.logistic import MiniBatchLogisticRegression 15 | from obp.policy.offline import IPWLearner 16 | from obp.policy.offline import NNPolicyLearner 17 | from obp.policy.offline import QLearner 18 | from obp.policy.offline_continuous import ContinuousNNPolicyLearner 19 | 20 | 21 | __all__ = [ 22 | "BaseContextFreePolicy", 23 | "BaseContextualPolicy", 24 | "BaseOfflinePolicyLearner", 25 | "BaseContinuousOfflinePolicyLearner", 26 | "EpsilonGreedy", 27 | "Random", 28 | "BernoulliTS", 29 | "LinEpsilonGreedy", 30 | "LinUCB", 31 | "LinTS", 32 | "LogisticEpsilonGreedy", 33 | "LogisticUCB", 34 | "LogisticTS", 35 | "MiniBatchLogisticRegression", 36 | "IPWLearner", 37 | "NNPolicyLearner", 38 | "QLearner", 39 | "ContinuousNNPolicyLearner", 40 | ] 41 | -------------------------------------------------------------------------------- /obp/policy/conf/prior_bts.yaml: -------------------------------------------------------------------------------- 1 | all: 2 | alpha: 3 | - 47.0 4 | - 8.0 5 | - 62.0 6 | - 142.0 7 | - 3.0 8 | - 14.0 9 | - 7.0 10 | - 857.0 11 | - 12.0 12 | - 15.0 13 | - 6.0 14 | - 100.0 15 | - 48.0 16 | - 23.0 17 | - 71.0 18 | - 61.0 19 | - 13.0 20 | - 16.0 21 | - 518.0 22 | - 30.0 23 | - 7.0 24 | - 4.0 25 | - 23.0 26 | - 8.0 27 | - 10.0 28 | - 11.0 29 | - 11.0 30 | - 18.0 31 | - 121.0 32 | - 11.0 33 | - 11.0 34 | - 10.0 35 | - 14.0 36 | - 9.0 37 | - 204.0 38 | - 58.0 39 | - 3.0 40 | - 19.0 41 | - 42.0 42 | - 1013.0 43 | - 2.0 44 | - 328.0 45 | - 15.0 46 | - 31.0 47 | - 14.0 48 | - 138.0 49 | - 45.0 50 | - 55.0 51 | - 23.0 52 | - 38.0 53 | - 10.0 54 | - 401.0 55 | - 52.0 56 | - 6.0 57 | - 3.0 58 | - 6.0 59 | - 5.0 60 | - 32.0 61 | - 35.0 62 | - 133.0 63 | - 52.0 64 | - 820.0 65 | - 43.0 66 | - 195.0 67 | - 8.0 68 | - 42.0 69 | - 40.0 70 | - 4.0 71 | - 32.0 72 | - 30.0 73 | - 9.0 74 | - 22.0 75 | - 6.0 76 | - 23.0 77 | - 5.0 78 | - 54.0 79 | - 8.0 80 | - 22.0 81 | - 65.0 82 | - 246.0 83 | beta: 84 | - 12198.0 85 | - 3566.0 86 | - 15993.0 87 | - 35522.0 88 | - 2367.0 89 | - 4609.0 90 | - 3171.0 91 | - 181745.0 92 | - 4372.0 93 | - 4951.0 94 | - 3100.0 95 | - 24665.0 96 | - 13210.0 97 | - 7061.0 98 | - 18061.0 99 | - 17449.0 100 | - 5644.0 101 | - 6787.0 102 | - 111326.0 103 | - 8776.0 104 | - 3334.0 105 | - 2271.0 106 | - 7389.0 107 | - 2659.0 108 | - 3665.0 109 | - 4724.0 110 | - 3561.0 111 | - 5085.0 112 | - 27407.0 113 | - 4601.0 114 | - 4756.0 115 | - 4120.0 116 | - 4736.0 117 | - 3788.0 118 | - 45292.0 119 | - 14719.0 120 | - 2189.0 121 | - 5589.0 122 | - 11995.0 123 | - 222255.0 124 | - 2308.0 125 | - 70034.0 126 | - 4801.0 127 | - 8274.0 128 | - 5421.0 129 | - 31912.0 130 | - 12213.0 131 | - 13576.0 132 | - 6230.0 133 | - 10382.0 134 | - 4141.0 135 | - 85731.0 136 | - 12811.0 137 | - 2707.0 138 | - 2250.0 139 | - 2668.0 140 | - 2886.0 141 | - 9581.0 142 | - 9465.0 143 | - 28336.0 144 | - 12062.0 145 | - 162793.0 146 | - 12107.0 147 | - 41240.0 148 | - 3162.0 149 | - 11604.0 150 | - 10818.0 151 | - 2923.0 152 | - 8897.0 153 | - 8654.0 154 | - 4000.0 155 | - 6580.0 156 | - 3174.0 157 | - 6766.0 158 | - 2602.0 159 | - 14506.0 160 | - 3968.0 161 | - 7523.0 162 | - 16532.0 163 | - 51964.0 164 | men: 165 | alpha: 166 | - 47.0 167 | - 8.0 168 | - 62.0 169 | - 142.0 170 | - 3.0 171 | - 6.0 172 | - 100.0 173 | - 48.0 174 | - 23.0 175 | - 71.0 176 | - 61.0 177 | - 13.0 178 | - 16.0 179 | - 518.0 180 | - 30.0 181 | - 7.0 182 | - 4.0 183 | - 23.0 184 | - 8.0 185 | - 10.0 186 | - 11.0 187 | - 11.0 188 | - 18.0 189 | - 121.0 190 | - 11.0 191 | - 4.0 192 | - 32.0 193 | - 30.0 194 | - 9.0 195 | - 22.0 196 | - 6.0 197 | - 23.0 198 | - 5.0 199 | - 54.0 200 | beta: 201 | - 12198.0 202 | - 3566.0 203 | - 15993.0 204 | - 35522.0 205 | - 2367.0 206 | - 3100.0 207 | - 24665.0 208 | - 13210.0 209 | - 7061.0 210 | - 18061.0 211 | - 17449.0 212 | - 5644.0 213 | - 6787.0 214 | - 111326.0 215 | - 8776.0 216 | - 3334.0 217 | - 2271.0 218 | - 7389.0 219 | - 2659.0 220 | - 3665.0 221 | - 4724.0 222 | - 3561.0 223 | - 5085.0 224 | - 27407.0 225 | - 4601.0 226 | - 2923.0 227 | - 8897.0 228 | - 8654.0 229 | - 4000.0 230 | - 6580.0 231 | - 3174.0 232 | - 6766.0 233 | - 2602.0 234 | - 14506.0 235 | women: 236 | alpha: 237 | - 12.0 238 | - 7.0 239 | - 984.0 240 | - 13.0 241 | - 15.0 242 | - 15.0 243 | - 11.0 244 | - 14.0 245 | - 9.0 246 | - 200.0 247 | - 72.0 248 | - 3.0 249 | - 14.0 250 | - 49.0 251 | - 1278.0 252 | - 3.0 253 | - 325.0 254 | - 14.0 255 | - 27.0 256 | - 14.0 257 | - 169.0 258 | - 48.0 259 | - 47.0 260 | - 18.0 261 | - 40.0 262 | - 12.0 263 | - 447.0 264 | - 46.0 265 | - 5.0 266 | - 3.0 267 | - 5.0 268 | - 7.0 269 | - 35.0 270 | - 34.0 271 | - 99.0 272 | - 30.0 273 | - 880.0 274 | - 51.0 275 | - 182.0 276 | - 6.0 277 | - 45.0 278 | - 39.0 279 | - 10.0 280 | - 24.0 281 | - 72.0 282 | - 229.0 283 | beta: 284 | - 3612.0 285 | - 3173.0 286 | - 204484.0 287 | - 4517.0 288 | - 4765.0 289 | - 5331.0 290 | - 4131.0 291 | - 4728.0 292 | - 4028.0 293 | - 44280.0 294 | - 17918.0 295 | - 2309.0 296 | - 4339.0 297 | - 12922.0 298 | - 270771.0 299 | - 2480.0 300 | - 68475.0 301 | - 5129.0 302 | - 7367.0 303 | - 5819.0 304 | - 38026.0 305 | - 13047.0 306 | - 11604.0 307 | - 5394.0 308 | - 10912.0 309 | - 4439.0 310 | - 94485.0 311 | - 10700.0 312 | - 2679.0 313 | - 2319.0 314 | - 2578.0 315 | - 3288.0 316 | - 9566.0 317 | - 9775.0 318 | - 20120.0 319 | - 7317.0 320 | - 172026.0 321 | - 13673.0 322 | - 37329.0 323 | - 3365.0 324 | - 10911.0 325 | - 10734.0 326 | - 4278.0 327 | - 7574.0 328 | - 16826.0 329 | - 47462.0 330 | 331 | -------------------------------------------------------------------------------- /obp/policy/policy_type.py: -------------------------------------------------------------------------------- 1 | import enum 2 | 3 | 4 | class PolicyType(enum.Enum): 5 | """Policy type. 6 | 7 | Attributes 8 | ---------- 9 | CONTEXT_FREE: 10 | The policy type is contextfree. 11 | CONTEXTUAL: 12 | The policy type is contextual. 13 | OFFLINE: 14 | The policy type is offline. 15 | """ 16 | 17 | CONTEXT_FREE = enum.auto() 18 | CONTEXTUAL = enum.auto() 19 | OFFLINE = enum.auto() 20 | 21 | def __repr__(self) -> str: 22 | 23 | return str(self) 24 | -------------------------------------------------------------------------------- /obp/simulator/__init__.py: -------------------------------------------------------------------------------- 1 | from obp.simulator.simulator import calc_ground_truth_policy_value 2 | 3 | 4 | __all__ = [ 5 | "calc_ground_truth_policy_value", 6 | ] 7 | -------------------------------------------------------------------------------- /obp/simulator/delay_sampler.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | import numpy as np 4 | from sklearn.utils import check_random_state 5 | 6 | 7 | @dataclass 8 | class ExponentialDelaySampler: 9 | """Class for sampling delays from different exponential functions. 10 | 11 | Parameters 12 | ----------- 13 | max_scale: float, default=100.0 14 | The maximum scale parameter for the exponential delay distribution. When there is no weighted exponential 15 | function the max_scale becomes the default scale. 16 | 17 | min_scale: float, default=10.0 18 | The minimum scale parameter for the exponential delay distribution. Only used when sampling from a weighted 19 | exponential function. 20 | 21 | random_state: int, default=12345 22 | Controls the random seed in sampling synthetic bandit data. 23 | """ 24 | 25 | max_scale: float = 100.0 26 | min_scale: float = 10.0 27 | random_state: int = None 28 | 29 | def __post_init__(self) -> None: 30 | if self.random_state is None: 31 | raise ValueError("`random_state` must be given") 32 | self.random_ = check_random_state(self.random_state) 33 | 34 | def exponential_delay_function( 35 | self, n_rounds: int, n_actions: int, **kwargs 36 | ) -> np.ndarray: 37 | """Exponential delay function used for sampling a number of delay rounds before rewards can be observed. 38 | 39 | Note 40 | ------ 41 | This implementation of the exponential delay function assumes that there is no causal relationship between the 42 | context, action or reward and observed delay. Exponential delay function have been observed by Ktena, S.I. et al. 43 | 44 | Parameters 45 | ----------- 46 | n_rounds: int 47 | Number of rounds to sample delays for. 48 | 49 | n_actions: int 50 | Number of actions to sample delays for. If the exponential function is not parameterised the delays are 51 | repeated for each actions. 52 | 53 | Returns 54 | --------- 55 | delay_rounds: array-like, shape (n_rounds, ) 56 | Rounded up round delays representing the amount of rounds before the policy can observe the rewards. 57 | 58 | References 59 | ------------ 60 | Ktena, S.I., Tejani, A., Theis, L., Myana, P.K., Dilipkumar, D., Huszár, F., Yoo, S. and Shi, W. 61 | "Addressing delayed feedback for continuous training with neural networks in CTR prediction." 2019. 62 | 63 | """ 64 | delays_per_round = np.ceil( 65 | self.random_.exponential(scale=self.max_scale, size=n_rounds) 66 | ) 67 | 68 | return np.tile(delays_per_round, (n_actions, 1)).T 69 | 70 | def exponential_delay_function_expected_reward_weighted( 71 | self, expected_rewards: np.ndarray, **kwargs 72 | ) -> np.ndarray: 73 | """Exponential delay function used for sampling a number of delay rounds before rewards can be observed. 74 | Each delay is conditioned on the expected reward by multiplying (1 - expected_reward) * scale. This creates 75 | the assumption that the more likely a reward is going be observed, the more likely it will be that the reward 76 | comes sooner. Eg. recommending an attractive item will likely result in a faster purchase. 77 | 78 | Parameters 79 | ----------- 80 | expected_rewards : array-like, shape (n_rounds, n_actions) 81 | The expected reward between 0 and 1 for each arm for each round. This used to weight the scale of the 82 | exponential function. 83 | 84 | Returns 85 | --------- 86 | delay_rounds: array-like, shape (n_rounds, ) 87 | Rounded up round delays representing the amount of rounds before the policy can observe the rewards. 88 | """ 89 | scale = self.min_scale + ( 90 | (1 - expected_rewards) * (self.max_scale - self.min_scale) 91 | ) 92 | delays_per_round = np.ceil( 93 | self.random_.exponential(scale=scale, size=expected_rewards.shape) 94 | ) 95 | 96 | return delays_per_round 97 | -------------------------------------------------------------------------------- /obp/simulator/replay.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tqdm as tqdm 3 | 4 | from obp.policy.policy_type import PolicyType 5 | from obp.simulator.simulator import BanditPolicy 6 | from obp.types import BanditFeedback 7 | from obp.utils import check_bandit_feedback_inputs, convert_to_action_dist 8 | 9 | 10 | def run_bandit_replay( 11 | bandit_feedback: BanditFeedback, policy: BanditPolicy 12 | ) -> np.ndarray: 13 | """Run an online bandit algorithm on given logged bandit feedback data using the replay method. 14 | 15 | Parameters 16 | ---------- 17 | bandit_feedback: BanditFeedback 18 | Logged bandit data used in offline bandit simulation. 19 | policy: BanditPolicy 20 | Online bandit policy to be evaluated in offline bandit simulation (i.e., evaluation policy). 21 | Returns 22 | -------- 23 | action_dist: array-like, shape (n_rounds, n_actions, len_list) 24 | Action choice probabilities (can be deterministic). 25 | 26 | References 27 | ------------ 28 | Lihong Li, Wei Chu, John Langford, and Xuanhui Wang. 29 | "Unbiased Offline Evaluation of Contextual-bandit-based News Article Recommendation Algorithms.", 2011. 30 | """ 31 | for key_ in ["action", "position", "reward", "pscore", "context"]: 32 | if key_ not in bandit_feedback: 33 | raise RuntimeError(f"Missing key of {key_} in 'bandit_feedback'.") 34 | check_bandit_feedback_inputs( 35 | context=bandit_feedback["context"], 36 | action=bandit_feedback["action"], 37 | reward=bandit_feedback["reward"], 38 | position=bandit_feedback["position"], 39 | pscore=bandit_feedback["pscore"], 40 | ) 41 | 42 | policy_ = policy 43 | selected_actions_list = list() 44 | dim_context = bandit_feedback["context"].shape[1] 45 | if bandit_feedback["position"] is None: 46 | bandit_feedback["position"] = np.zeros_like( 47 | bandit_feedback["action"], dtype=int 48 | ) 49 | for action_, reward_, position_, context_ in tqdm( 50 | zip( 51 | bandit_feedback["action"], 52 | bandit_feedback["reward"], 53 | bandit_feedback["position"], 54 | bandit_feedback["context"], 55 | ), 56 | total=bandit_feedback["n_rounds"], 57 | ): 58 | 59 | # select a list of actions 60 | if policy_.policy_type == PolicyType.CONTEXT_FREE: 61 | selected_actions = policy_.select_action() 62 | elif policy_.policy_type == PolicyType.CONTEXTUAL: 63 | selected_actions = policy_.select_action(context_.reshape(1, dim_context)) 64 | action_match_ = action_ == selected_actions[position_] 65 | # update parameters of a bandit policy 66 | # only when selected actions&positions are equal to logged actions&positions 67 | if action_match_: 68 | if policy_.policy_type == PolicyType.CONTEXT_FREE: 69 | policy_.update_params(action=action_, reward=reward_) 70 | elif policy_.policy_type == PolicyType.CONTEXTUAL: 71 | policy_.update_params( 72 | action=action_, 73 | reward=reward_, 74 | context=context_.reshape(1, dim_context), 75 | ) 76 | selected_actions_list.append(selected_actions) 77 | 78 | action_dist = convert_to_action_dist( 79 | n_actions=bandit_feedback["action"].max() + 1, 80 | selected_actions=np.array(selected_actions_list), 81 | ) 82 | return action_dist 83 | -------------------------------------------------------------------------------- /obp/types.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Yuta Saito, Yusuke Narita, and ZOZO Technologies, Inc. All rights reserved. 2 | # Licensed under the Apache 2.0 License. 3 | 4 | """Types.""" 5 | from typing import Dict 6 | from typing import Union 7 | 8 | import numpy as np 9 | 10 | 11 | # dataset 12 | BanditFeedback = Dict[str, Union[int, np.ndarray]] 13 | -------------------------------------------------------------------------------- /obp/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.5.5" 2 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "obp" 3 | version = "0.5.5" 4 | description = "Open Bandit Pipeline: a python library for off-policy evaluation and learning" 5 | authors = ["Yuta Saito "] 6 | license = "Apache License 2.0" 7 | 8 | [tool.poetry.dependencies] 9 | python = ">=3.7.1,<3.10" 10 | torch = "^1.9.0" 11 | scikit-learn = "1.0.2" 12 | pandas = "^1.3.2" 13 | numpy = "^1.21.2" 14 | matplotlib = "^3.4.3" 15 | tqdm = "^4.62.2" 16 | scipy = "1.7.3" 17 | PyYAML = "^5.4.1" 18 | seaborn = "^0.11.2" 19 | pyieoe = "^0.1.1" 20 | pingouin = "^0.4.0" 21 | mypy-extensions = "^0.4.3" 22 | Pillow = "9.1.1" 23 | 24 | [tool.poetry.dev-dependencies] 25 | flake8 = "^3.9.2" 26 | black = "22.1.0" 27 | pytest = "^6.2.5" 28 | isort = "^5.9.3" 29 | 30 | [build-system] 31 | requires = ["poetry-core>=1.0.0"] 32 | build-backend = "poetry.core.masonry.api" 33 | 34 | [tool.isort] 35 | profile = 'black' 36 | src_paths = ['obp', 'tests', 'examples', 'benchmark'] 37 | line_length = 88 38 | lines_after_imports = 2 39 | force_single_line = 'True' 40 | force_sort_within_sections = 'True' 41 | order_by_type = 'False' 42 | 43 | [tool.pytest.ini_options] 44 | addopts = "--color=yes" 45 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = 3 | E501,W503,W605,E203 4 | # We ignore E501: line too long because we assume 5 | # the checking of code length is already done by black. 6 | # We ignore W503: line break before binary operator because it is incompatible with black 7 | # We ignore W605: invalid escape sequence because it is needed to write math equations 8 | # We ignore E203: whitespace before ':' 9 | exclude = .venv,build 10 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from obp.version import __version__ 2 | from setuptools import setup, find_packages 3 | from os import path 4 | import sys 5 | 6 | here = path.abspath(path.dirname(__file__)) 7 | sys.path.insert(0, path.join(here, "obp")) 8 | 9 | print("version") 10 | print(__version__) 11 | 12 | with open(path.join(here, "README.md"), encoding="utf-8") as f: 13 | long_description = f.read() 14 | 15 | package_data_list = ["obp/policy/conf/prior_bts.yaml", "obp/dataset/obd"] 16 | 17 | setup( 18 | name="obp", 19 | version=__version__, 20 | description="Open Bandit Pipeline: a python library for bandit algorithms and off-policy evaluation", 21 | url="https://github.com/st-tech/zr-obp", 22 | author="Yuta Saito", 23 | author_email="open-bandit-project@googlegroups.com", 24 | keywords=["bandit algorithms", "off-policy evaluation"], 25 | long_description=long_description, 26 | long_description_content_type="text/markdown", 27 | install_requires=[ 28 | "matplotlib>=3.4.3", 29 | "mypy-extensions>=0.4.3", 30 | "numpy>=1.21.2", 31 | "pandas>=1.3.2", 32 | "pyyaml>=5.1", 33 | "seaborn>=0.10.1", 34 | "scikit-learn>=1.0.2", 35 | "scipy>=1.7.3", 36 | "torch>=1.9.0", 37 | "tqdm>=4.62.2", 38 | "pyieoe>=0.1.1", 39 | "pingouin>=0.4.0", 40 | ], 41 | license="Apache License", 42 | packages=find_packages( 43 | exclude=["benchmark", "docs", "examples", "obd", "tests", "slides"] 44 | ), 45 | package_data={"obp": package_data_list}, 46 | include_package_data=True, 47 | classifiers=[ 48 | "Intended Audience :: Science/Research", 49 | "Programming Language :: Python :: 3.7", 50 | "Programming Language :: Python :: 3.8", 51 | "Programming Language :: Python :: 3.9", 52 | "Topic :: Scientific/Engineering", 53 | "Topic :: Scientific/Engineering :: Mathematics", 54 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 55 | "Topic :: Software Development", 56 | "Topic :: Software Development :: Libraries", 57 | "Topic :: Software Development :: Libraries :: Python Modules", 58 | "License :: OSI Approved :: Apache Software License", 59 | ], 60 | ) 61 | -------------------------------------------------------------------------------- /slides/slides_EN.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/slides/slides_EN.pdf -------------------------------------------------------------------------------- /slides/slides_JN.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/slides/slides_JN.pdf -------------------------------------------------------------------------------- /tests/dataset/test_multiclass.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import numpy as np 4 | import pytest 5 | from sklearn.datasets import load_digits 6 | from sklearn.linear_model import LogisticRegression 7 | 8 | from obp.dataset import MultiClassToBanditReduction 9 | 10 | 11 | @pytest.fixture(scope="session") 12 | def raw_data() -> Tuple[np.ndarray, np.ndarray]: 13 | X, y = load_digits(return_X_y=True) 14 | return X, y 15 | 16 | 17 | def test_invalid_initialization(raw_data): 18 | X, y = raw_data 19 | 20 | # invalid alpha_b 21 | with pytest.raises(ValueError): 22 | MultiClassToBanditReduction( 23 | X=X, y=y, base_classifier_b=LogisticRegression(max_iter=10000), alpha_b=-0.3 24 | ) 25 | 26 | with pytest.raises(ValueError): 27 | MultiClassToBanditReduction( 28 | X=X, y=y, base_classifier_b=LogisticRegression(max_iter=10000), alpha_b=1.3 29 | ) 30 | 31 | # invalid classifier 32 | with pytest.raises(ValueError): 33 | from sklearn.tree import DecisionTreeRegressor 34 | 35 | MultiClassToBanditReduction(X=X, y=y, base_classifier_b=DecisionTreeRegressor) 36 | 37 | # invalid n_def_actions 38 | with pytest.raises(TypeError): 39 | MultiClassToBanditReduction( 40 | X=X, 41 | y=y, 42 | base_classifier_b=LogisticRegression(max_iter=10000), 43 | n_deficient_actions="aaa", 44 | ) 45 | 46 | with pytest.raises(TypeError): 47 | MultiClassToBanditReduction( 48 | X=X, 49 | y=y, 50 | base_classifier_b=LogisticRegression(max_iter=10000), 51 | n_deficient_actions=None, 52 | ) 53 | 54 | with pytest.raises(ValueError): 55 | MultiClassToBanditReduction( 56 | X=X, 57 | y=y, 58 | base_classifier_b=LogisticRegression(max_iter=10000), 59 | n_deficient_actions=-1, 60 | ) 61 | 62 | with pytest.raises(ValueError): 63 | MultiClassToBanditReduction( 64 | X=X, 65 | y=y, 66 | base_classifier_b=LogisticRegression(max_iter=10000), 67 | n_deficient_actions=1000, 68 | ) 69 | 70 | 71 | def test_split_train_eval(raw_data): 72 | X, y = raw_data 73 | 74 | eval_size = 1000 75 | mcbr = MultiClassToBanditReduction( 76 | X=X, y=y, base_classifier_b=LogisticRegression(max_iter=10000), alpha_b=0.3 77 | ) 78 | mcbr.split_train_eval(eval_size=eval_size) 79 | 80 | assert eval_size == mcbr.n_rounds_ev 81 | 82 | 83 | def test_obtain_batch_bandit_feedback(raw_data): 84 | X, y = raw_data 85 | 86 | for n_deficient_actions in [0, 2]: 87 | mcbr = MultiClassToBanditReduction( 88 | X=X, 89 | y=y, 90 | base_classifier_b=LogisticRegression(max_iter=10000), 91 | alpha_b=0.3, 92 | n_deficient_actions=n_deficient_actions, 93 | ) 94 | mcbr.split_train_eval() 95 | bandit_feedback = mcbr.obtain_batch_bandit_feedback() 96 | 97 | assert "n_actions" in bandit_feedback.keys() 98 | assert "n_rounds" in bandit_feedback.keys() 99 | assert "context" in bandit_feedback.keys() 100 | assert "action" in bandit_feedback.keys() 101 | assert "reward" in bandit_feedback.keys() 102 | assert "position" in bandit_feedback.keys() 103 | assert "pi_b" in bandit_feedback.keys() 104 | assert "pscore" in bandit_feedback.keys() 105 | 106 | n_rounds = bandit_feedback["n_rounds"] 107 | pi_b = bandit_feedback["pi_b"] 108 | assert pi_b.shape[0] == n_rounds 109 | n_actions = np.unique(y).shape[0] 110 | assert pi_b.shape[1] == n_actions 111 | assert pi_b.shape[2] == 1 112 | assert np.allclose(pi_b[:, :, 0].sum(1), np.ones(n_rounds)) 113 | assert (pi_b == 0).sum() == n_deficient_actions * n_rounds 114 | 115 | 116 | def test_obtain_action_dist_by_eval_policy(raw_data): 117 | X, y = raw_data 118 | 119 | eval_size = 1000 120 | mcbr = MultiClassToBanditReduction( 121 | X=X, y=y, base_classifier_b=LogisticRegression(max_iter=10000), alpha_b=0.3 122 | ) 123 | mcbr.split_train_eval(eval_size=eval_size) 124 | 125 | # invalid alpha_e 126 | with pytest.raises(ValueError): 127 | mcbr.obtain_action_dist_by_eval_policy(alpha_e=-0.3) 128 | 129 | with pytest.raises(ValueError): 130 | mcbr.obtain_action_dist_by_eval_policy(alpha_e=1.3) 131 | 132 | # valid type 133 | action_dist = mcbr.obtain_action_dist_by_eval_policy() 134 | 135 | assert action_dist.shape[0] == eval_size 136 | n_actions = np.unique(y).shape[0] 137 | assert action_dist.shape[1] == n_actions 138 | assert action_dist.shape[2] == 1 139 | assert np.allclose(action_dist[:, :, 0].sum(1), np.ones(eval_size)) 140 | 141 | 142 | def test_calc_ground_truth_policy_value(raw_data): 143 | X, y = raw_data 144 | 145 | eval_size = 1000 146 | mcbr = MultiClassToBanditReduction( 147 | X=X, y=y, base_classifier_b=LogisticRegression(max_iter=10000), alpha_b=0.3 148 | ) 149 | mcbr.split_train_eval(eval_size=eval_size) 150 | 151 | with pytest.raises(ValueError): 152 | invalid_action_dist = np.zeros(eval_size) 153 | mcbr.calc_ground_truth_policy_value(action_dist=invalid_action_dist) 154 | 155 | with pytest.raises(ValueError): 156 | reshaped_action_dist = mcbr.obtain_action_dist_by_eval_policy().reshape( 157 | 1, -1, 1 158 | ) 159 | mcbr.calc_ground_truth_policy_value(action_dist=reshaped_action_dist) 160 | 161 | action_dist = mcbr.obtain_action_dist_by_eval_policy() 162 | ground_truth_policy_value = mcbr.calc_ground_truth_policy_value( 163 | action_dist=action_dist 164 | ) 165 | assert isinstance(ground_truth_policy_value, float) 166 | -------------------------------------------------------------------------------- /tests/dataset/test_real.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | from typing import Tuple 3 | 4 | import numpy as np 5 | import pandas as pd 6 | import pytest 7 | 8 | from obp.dataset import OpenBanditDataset 9 | 10 | 11 | def test_real_init(): 12 | # behavior_policy 13 | with pytest.raises(ValueError): 14 | OpenBanditDataset(behavior_policy="aaa", campaign="all") 15 | 16 | # campaign 17 | with pytest.raises(ValueError): 18 | OpenBanditDataset(behavior_policy="random", campaign="aaa") 19 | 20 | # data_path 21 | with pytest.raises(ValueError): 22 | OpenBanditDataset(behavior_policy="random", campaign="all", data_path=5) 23 | 24 | # load_raw_data 25 | obd = OpenBanditDataset(behavior_policy="random", campaign="all") 26 | # check the value exists and has the right type 27 | assert ( 28 | isinstance(obd.data, pd.DataFrame) 29 | and isinstance(obd.item_context, pd.DataFrame) 30 | and isinstance(obd.action, np.ndarray) 31 | and isinstance(obd.position, np.ndarray) 32 | and isinstance(obd.reward, np.ndarray) 33 | and isinstance(obd.pscore, np.ndarray) 34 | ) 35 | 36 | # pre_process (context and action_context) 37 | assert isinstance(obd.context, np.ndarray) and isinstance( 38 | obd.action_context, np.ndarray 39 | ) 40 | 41 | 42 | def test_obtain_batch_bandit_feedback(): 43 | # invalid test_size 44 | with pytest.raises(ValueError): 45 | dataset = OpenBanditDataset(behavior_policy="random", campaign="all") 46 | dataset.obtain_batch_bandit_feedback(is_timeseries_split=True, test_size=1.3) 47 | 48 | with pytest.raises(ValueError): 49 | dataset = OpenBanditDataset(behavior_policy="random", campaign="all") 50 | dataset.obtain_batch_bandit_feedback(is_timeseries_split=True, test_size=-0.5) 51 | 52 | with pytest.raises(TypeError): 53 | dataset = OpenBanditDataset(behavior_policy="random", campaign="all") 54 | dataset.obtain_batch_bandit_feedback(is_timeseries_split=True, test_size="0.5") 55 | 56 | with pytest.raises(TypeError): 57 | dataset = OpenBanditDataset(behavior_policy="random", campaign="all") 58 | dataset.obtain_batch_bandit_feedback(is_timeseries_split="True", test_size=0.5) 59 | 60 | # existence of keys 61 | # is_timeseries_split=False (default) 62 | dataset = OpenBanditDataset(behavior_policy="random", campaign="all") 63 | bandit_feedback = dataset.obtain_batch_bandit_feedback() 64 | 65 | assert "n_rounds" in bandit_feedback.keys() 66 | assert "n_actions" in bandit_feedback.keys() 67 | assert "action" in bandit_feedback.keys() 68 | assert "position" in bandit_feedback.keys() 69 | assert "reward" in bandit_feedback.keys() 70 | assert "pscore" in bandit_feedback.keys() 71 | assert "context" in bandit_feedback.keys() 72 | assert "action_context" in bandit_feedback.keys() 73 | 74 | # is_timeseries_split=True 75 | bandit_feedback_timeseries = dataset.obtain_batch_bandit_feedback( 76 | is_timeseries_split=True 77 | ) 78 | assert isinstance(bandit_feedback_timeseries, Tuple) 79 | bandit_feedback_train = bandit_feedback_timeseries[0] 80 | bandit_feedback_test = bandit_feedback_timeseries[1] 81 | 82 | bf_elems = { 83 | "n_rounds", 84 | "n_actions", 85 | "action", 86 | "position", 87 | "reward", 88 | "pscore", 89 | "context", 90 | "action_context", 91 | } 92 | assert all(k in bandit_feedback_train.keys() for k in bf_elems) 93 | assert all(k in bandit_feedback_test.keys() for k in bf_elems) 94 | 95 | 96 | def test_calc_on_policy_policy_value_estimate(): 97 | ground_truth_policy_value = OpenBanditDataset.calc_on_policy_policy_value_estimate( 98 | behavior_policy="random", campaign="all" 99 | ) 100 | assert isinstance(ground_truth_policy_value, float) 101 | 102 | 103 | def test_sample_bootstrap_bandit_feedback(): 104 | with pytest.raises(ValueError): 105 | dataset = OpenBanditDataset(behavior_policy="random", campaign="all") 106 | dataset.sample_bootstrap_bandit_feedback( 107 | is_timeseries_split=True, test_size=1.3 108 | ) 109 | 110 | with pytest.raises(ValueError): 111 | dataset = OpenBanditDataset(behavior_policy="random", campaign="all") 112 | dataset.sample_bootstrap_bandit_feedback( 113 | is_timeseries_split=True, test_size=-0.5 114 | ) 115 | 116 | with pytest.raises(ValueError): 117 | dataset = OpenBanditDataset(behavior_policy="random", campaign="all") 118 | dataset.sample_bootstrap_bandit_feedback(sample_size=-50) 119 | 120 | with pytest.raises(TypeError): 121 | dataset = OpenBanditDataset(behavior_policy="random", campaign="all") 122 | dataset.sample_bootstrap_bandit_feedback(sample_size=50.0) 123 | 124 | with pytest.raises(ValueError): 125 | dataset = OpenBanditDataset(behavior_policy="random", campaign="all") 126 | dataset.sample_bootstrap_bandit_feedback(sample_size=10000000) 127 | 128 | dataset = OpenBanditDataset(behavior_policy="random", campaign="all") 129 | bandit_feedback = dataset.obtain_batch_bandit_feedback() 130 | bootstrap_bf = dataset.sample_bootstrap_bandit_feedback() 131 | 132 | bf_keys = {"action", "position", "reward", "pscore", "context"} 133 | for k in bf_keys: 134 | assert len(bandit_feedback[k]) == len(bootstrap_bf[k]) 135 | 136 | bandit_feedback_timeseries: Dict = dataset.obtain_batch_bandit_feedback( 137 | is_timeseries_split=True 138 | )[0] 139 | bootstrap_bf_timeseries = dataset.sample_bootstrap_bandit_feedback( 140 | is_timeseries_split=True 141 | ) 142 | for k in bf_keys: 143 | assert len(bandit_feedback_timeseries[k]) == len(bootstrap_bf_timeseries[k]) 144 | 145 | sample_size = 1000 146 | dataset = OpenBanditDataset(behavior_policy="random", campaign="all") 147 | bootstrap_bf = dataset.sample_bootstrap_bandit_feedback(sample_size=sample_size) 148 | assert bootstrap_bf["n_rounds"] == sample_size 149 | for k in bf_keys: 150 | assert len(bootstrap_bf[k]) == sample_size 151 | -------------------------------------------------------------------------------- /tests/ope/conftest.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | from typing import Set 4 | 5 | import numpy as np 6 | import pytest 7 | from scipy import special 8 | from sklearn.utils import check_random_state 9 | 10 | from obp.dataset import linear_behavior_policy 11 | from obp.dataset import logistic_reward_function 12 | from obp.dataset import SyntheticBanditDataset 13 | from obp.dataset import SyntheticBanditDatasetWithActionEmbeds 14 | from obp.dataset import SyntheticContinuousBanditDataset 15 | from obp.dataset import SyntheticMultiLoggersBanditDataset 16 | from obp.dataset import SyntheticSlateBanditDataset 17 | from obp.policy import Random 18 | from obp.types import BanditFeedback 19 | from obp.utils import sigmoid 20 | 21 | 22 | # resolve ImportMismatchError when using virtual environment 23 | os.environ["PY_IGNORE_IMPORTMISMATCH"] = "1" 24 | 25 | 26 | # generate synthetic bandit dataset using SyntheticBanditDataset 27 | @pytest.fixture(scope="session") 28 | def synthetic_bandit_feedback() -> BanditFeedback: 29 | n_actions = 10 30 | dim_context = 5 31 | random_state = 12345 32 | n_rounds = 10000 33 | dataset = SyntheticBanditDataset( 34 | n_actions=n_actions, 35 | dim_context=dim_context, 36 | reward_function=logistic_reward_function, 37 | behavior_policy_function=linear_behavior_policy, 38 | random_state=random_state, 39 | ) 40 | bandit_feedback = dataset.obtain_batch_bandit_feedback(n_rounds=n_rounds) 41 | return bandit_feedback 42 | 43 | 44 | # generate synthetic slate bandit dataset using SyntheticSlateBanditDataset 45 | @pytest.fixture(scope="session") 46 | def synthetic_slate_bandit_feedback() -> BanditFeedback: 47 | # set parameters 48 | n_unique_action = 10 49 | len_list = 3 50 | dim_context = 2 51 | reward_type = "binary" 52 | random_state = 12345 53 | n_rounds = 100 54 | dataset = SyntheticSlateBanditDataset( 55 | n_unique_action=n_unique_action, 56 | len_list=len_list, 57 | dim_context=dim_context, 58 | reward_type=reward_type, 59 | random_state=random_state, 60 | ) 61 | # obtain feedback 62 | bandit_feedback = dataset.obtain_batch_bandit_feedback(n_rounds=n_rounds) 63 | return bandit_feedback 64 | 65 | 66 | # generate synthetic continuous bandit dataset using SyntheticContinuousBanditDataset 67 | @pytest.fixture(scope="session") 68 | def synthetic_continuous_bandit_feedback() -> BanditFeedback: 69 | # set parameters 70 | dim_context = 2 71 | random_state = 12345 72 | n_rounds = 100 73 | min_action_value = -10 74 | max_action_value = 10 75 | dataset = SyntheticContinuousBanditDataset( 76 | dim_context=dim_context, 77 | min_action_value=min_action_value, 78 | max_action_value=max_action_value, 79 | random_state=random_state, 80 | ) 81 | # obtain feedback 82 | bandit_feedback = dataset.obtain_batch_bandit_feedback(n_rounds=n_rounds) 83 | return bandit_feedback 84 | 85 | 86 | @pytest.fixture(scope="session") 87 | def synthetic_multi_bandit_feedback() -> BanditFeedback: 88 | n_actions = 10 89 | dim_context = 5 90 | betas = [-10, -5, 0, 5, 10] 91 | rhos = [1, 2, 3, 2, 1] 92 | random_state = 12345 93 | n_rounds = 10000 94 | dataset = SyntheticMultiLoggersBanditDataset( 95 | n_actions=n_actions, 96 | dim_context=dim_context, 97 | betas=betas, 98 | rhos=rhos, 99 | reward_function=logistic_reward_function, 100 | random_state=random_state, 101 | ) 102 | bandit_feedback = dataset.obtain_batch_bandit_feedback(n_rounds=n_rounds) 103 | return bandit_feedback 104 | 105 | 106 | @pytest.fixture(scope="session") 107 | def synthetic_bandit_feedback_with_embed() -> BanditFeedback: 108 | n_actions = 10 109 | dim_context = 5 110 | n_cat_dim = 3 111 | n_cat_per_dim = 5 112 | random_state = 12345 113 | n_rounds = 10000 114 | dataset = SyntheticBanditDatasetWithActionEmbeds( 115 | n_actions=n_actions, 116 | dim_context=dim_context, 117 | n_cat_dim=n_cat_dim, 118 | n_cat_per_dim=n_cat_per_dim, 119 | reward_function=logistic_reward_function, 120 | random_state=random_state, 121 | ) 122 | bandit_feedback = dataset.obtain_batch_bandit_feedback(n_rounds=n_rounds) 123 | return bandit_feedback 124 | 125 | 126 | # make the expected reward of synthetic bandit feedback close to that of the Open Bandit Dataset 127 | @pytest.fixture(scope="session") 128 | def fixed_synthetic_bandit_feedback(synthetic_bandit_feedback) -> BanditFeedback: 129 | # set random 130 | random_state = 12345 131 | random_ = check_random_state(random_state) 132 | # copy synthetic bandit feedback 133 | bandit_feedback = copy.deepcopy(synthetic_bandit_feedback) 134 | # expected reward would be about 0.65%, which is close to that of the ZOZO dataset 135 | logit = special.logit(bandit_feedback["expected_reward"]) 136 | bandit_feedback["expected_reward"] = sigmoid(logit - 4.0) 137 | expected_reward_factual = bandit_feedback["expected_reward"][ 138 | np.arange(bandit_feedback["n_rounds"]), bandit_feedback["action"] 139 | ] 140 | bandit_feedback["reward"] = random_.binomial(n=1, p=expected_reward_factual) 141 | return bandit_feedback 142 | 143 | 144 | # key set of bandit feedback data 145 | @pytest.fixture(scope="session") 146 | def feedback_key_set() -> Set[str]: 147 | return { 148 | "action", 149 | "action_context", 150 | "context", 151 | "expected_reward", 152 | "n_actions", 153 | "n_rounds", 154 | "position", 155 | "pi_b", 156 | "pscore", 157 | "reward", 158 | } 159 | 160 | 161 | # random evaluation policy 162 | @pytest.fixture(scope="session") 163 | def random_action_dist(synthetic_bandit_feedback) -> np.ndarray: 164 | n_actions = synthetic_bandit_feedback["n_actions"] 165 | evaluation_policy = Random(n_actions=n_actions, len_list=1) 166 | action_dist = evaluation_policy.compute_batch_action_dist( 167 | n_rounds=synthetic_bandit_feedback["n_rounds"] 168 | ) 169 | return action_dist 170 | 171 | 172 | def generate_action_dist(i, j, k): 173 | x = np.random.uniform(size=(i, j, k)) 174 | action_dist = x / x.sum(axis=1)[:, np.newaxis, :] 175 | return action_dist 176 | -------------------------------------------------------------------------------- /tests/ope/hyperparams.yaml: -------------------------------------------------------------------------------- 1 | lightgbm: 2 | n_estimators: 100 3 | learning_rate: 0.01 4 | max_depth: 5 5 | min_samples_leaf: 10 6 | random_state: 12345 7 | logistic_regression: 8 | max_iter: 10000 9 | C: 1000 10 | random_state: 12345 11 | random_forest: 12 | n_estimators: 100 13 | max_depth: 5 14 | min_samples_leaf: 10 15 | random_state: 12345 16 | ridge: 17 | alpha: 0.2 18 | random_state: 12345 19 | -------------------------------------------------------------------------------- /tests/ope/hyperparams_slate.yaml: -------------------------------------------------------------------------------- 1 | lightgbm: 2 | n_estimators: 100 3 | learning_rate: 0.01 4 | max_depth: 5 5 | min_samples_leaf: 10 6 | random_state: 12345 7 | random_forest: 8 | n_estimators: 100 9 | max_depth: 5 10 | min_samples_leaf: 10 11 | random_state: 12345 12 | ridge: 13 | alpha: 0.2 14 | random_state: 12345 15 | -------------------------------------------------------------------------------- /tests/ope/test_dm_estimators.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from conftest import generate_action_dist 4 | import numpy as np 5 | import pytest 6 | 7 | from obp.ope import DirectMethod 8 | from obp.types import BanditFeedback 9 | 10 | 11 | # action_dist, position, estimated_rewards_by_reg_model, description 12 | invalid_input_of_dm = [ 13 | ( 14 | generate_action_dist(5, 4, 3), 15 | np.zeros(5, dtype=int), 16 | np.zeros((5, 4, 2)), # 17 | "Expected `estimated_rewards_by_reg_model.shape == action_dist.shape`, but found it False", 18 | ), 19 | ( 20 | generate_action_dist(5, 4, 3), 21 | np.zeros(5, dtype=int), 22 | None, # 23 | "`estimated_rewards_by_reg_model` must be 3D array", 24 | ), 25 | ( 26 | generate_action_dist(5, 4, 3), 27 | np.zeros(5, dtype=int), 28 | "4", # 29 | "`estimated_rewards_by_reg_model` must be 3D array", 30 | ), 31 | ] 32 | 33 | 34 | @pytest.mark.parametrize( 35 | "action_dist, position, estimated_rewards_by_reg_model, description", 36 | invalid_input_of_dm, 37 | ) 38 | def test_dm_using_invalid_input_data( 39 | action_dist: np.ndarray, 40 | position: np.ndarray, 41 | estimated_rewards_by_reg_model: np.ndarray, 42 | description: str, 43 | ) -> None: 44 | dm = DirectMethod() 45 | with pytest.raises(ValueError, match=f"{description}*"): 46 | _ = dm.estimate_policy_value( 47 | action_dist=action_dist, 48 | position=position, 49 | estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, 50 | ) 51 | with pytest.raises(ValueError, match=f"{description}*"): 52 | _ = dm.estimate_interval( 53 | action_dist=action_dist, 54 | position=position, 55 | estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, 56 | ) 57 | 58 | 59 | def test_dm_using_random_evaluation_policy( 60 | synthetic_bandit_feedback: BanditFeedback, random_action_dist: np.ndarray 61 | ) -> None: 62 | """ 63 | Test the performance of the direct method using synthetic bandit data and random evaluation policy 64 | """ 65 | expected_reward = synthetic_bandit_feedback["expected_reward"][:, :, np.newaxis] 66 | action_dist = random_action_dist 67 | # compute ground truth policy value using expected reward 68 | q_pi_e = np.average(expected_reward[:, :, 0], weights=action_dist[:, :, 0], axis=1) 69 | # compute statistics of ground truth policy value 70 | gt_mean = q_pi_e.mean() 71 | # prepare dm 72 | dm = DirectMethod() 73 | # prepare input dict 74 | input_dict = { 75 | k: v 76 | for k, v in synthetic_bandit_feedback.items() 77 | if k in ["reward", "action", "pscore", "position"] 78 | } 79 | input_dict["action_dist"] = action_dist 80 | # estimated_rewards_by_reg_model is required 81 | with pytest.raises( 82 | TypeError, 83 | match=re.escape( 84 | "estimate_policy_value() missing 1 required positional argument: 'estimated_rewards_by_reg_model'" 85 | ), 86 | ): 87 | _ = dm.estimate_policy_value(**input_dict) 88 | # add estimated_rewards_by_reg_model 89 | input_dict["estimated_rewards_by_reg_model"] = expected_reward 90 | # check expectation 91 | estimated_policy_value = dm.estimate_policy_value(**input_dict) 92 | assert ( 93 | gt_mean == estimated_policy_value 94 | ), "DM should be perfect when the regression model is perfect" 95 | # remove unnecessary keys 96 | del input_dict["reward"] 97 | del input_dict["pscore"] 98 | del input_dict["action"] 99 | estimated_policy_value = dm.estimate_policy_value(**input_dict) 100 | assert ( 101 | gt_mean == estimated_policy_value 102 | ), "DM should be perfect when the regression model is perfect" 103 | -------------------------------------------------------------------------------- /tests/ope/test_kernel_functions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import integrate 3 | 4 | from obp.ope import cosine_kernel 5 | from obp.ope import epanechnikov_kernel 6 | from obp.ope import gaussian_kernel 7 | from obp.ope import triangular_kernel 8 | 9 | 10 | def test_kernel_functions(): 11 | # triangular 12 | assert np.isclose( 13 | integrate.quad(lambda x: triangular_kernel(x), -np.inf, np.inf)[0], 1 14 | ) 15 | assert np.isclose( 16 | integrate.quad(lambda x: x * triangular_kernel(x), -np.inf, np.inf)[0], 0 17 | ) 18 | assert integrate.quad(lambda x: triangular_kernel(x) ** 2, -np.inf, np.inf)[0] > 0 19 | 20 | # epanechnikov 21 | assert np.isclose( 22 | integrate.quad(lambda x: epanechnikov_kernel(x), -np.inf, np.inf)[0], 1 23 | ) 24 | assert np.isclose( 25 | integrate.quad(lambda x: x * epanechnikov_kernel(x), -np.inf, np.inf)[0], 0 26 | ) 27 | assert integrate.quad(lambda x: epanechnikov_kernel(x) ** 2, -np.inf, np.inf)[0] > 0 28 | 29 | # gaussian 30 | assert np.isclose( 31 | integrate.quad(lambda x: gaussian_kernel(x), -np.inf, np.inf)[0], 1 32 | ) 33 | assert np.isclose( 34 | integrate.quad(lambda x: x * gaussian_kernel(x), -np.inf, np.inf)[0], 0 35 | ) 36 | assert integrate.quad(lambda x: gaussian_kernel(x) ** 2, -np.inf, np.inf)[0] > 0 37 | 38 | # cosine 39 | assert np.isclose(integrate.quad(lambda x: cosine_kernel(x), -np.inf, np.inf)[0], 1) 40 | assert np.isclose( 41 | integrate.quad(lambda x: x * cosine_kernel(x), -np.inf, np.inf)[0], 0 42 | ) 43 | assert integrate.quad(lambda x: cosine_kernel(x) ** 2, -np.inf, np.inf)[0] > 0 44 | -------------------------------------------------------------------------------- /tests/policy/test_contextfree.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from obp.policy.contextfree import BernoulliTS 5 | from obp.policy.contextfree import EpsilonGreedy 6 | from obp.policy.contextfree import Random 7 | from obp.policy.policy_type import PolicyType 8 | 9 | 10 | def test_contextfree_base_exception(): 11 | # invalid n_actions 12 | with pytest.raises(ValueError): 13 | EpsilonGreedy(n_actions=0) 14 | 15 | with pytest.raises(TypeError): 16 | EpsilonGreedy(n_actions="3") 17 | 18 | # invalid len_list 19 | with pytest.raises(ValueError): 20 | EpsilonGreedy(n_actions=2, len_list=-1) 21 | 22 | with pytest.raises(TypeError): 23 | EpsilonGreedy(n_actions=2, len_list="5") 24 | 25 | # invalid batch_size 26 | with pytest.raises(ValueError): 27 | EpsilonGreedy(n_actions=2, batch_size=-3) 28 | 29 | with pytest.raises(TypeError): 30 | EpsilonGreedy(n_actions=2, batch_size="3") 31 | 32 | # invalid relationship between n_actions and len_list 33 | with pytest.raises(ValueError): 34 | EpsilonGreedy(n_actions=5, len_list=10) 35 | 36 | with pytest.raises(ValueError): 37 | EpsilonGreedy(n_actions=2, len_list=3) 38 | 39 | 40 | def test_egreedy_normal_epsilon(): 41 | 42 | policy1 = EpsilonGreedy(n_actions=2) 43 | assert 0 <= policy1.epsilon <= 1 44 | 45 | policy2 = EpsilonGreedy(n_actions=3, epsilon=0.3) 46 | assert 0 <= policy2.epsilon <= 1 47 | 48 | # policy type 49 | assert EpsilonGreedy(n_actions=2).policy_type == PolicyType.CONTEXT_FREE 50 | 51 | 52 | def test_egreedy_abnormal_epsilon(): 53 | 54 | with pytest.raises(ValueError): 55 | EpsilonGreedy(n_actions=2, epsilon=1.2) 56 | 57 | with pytest.raises(ValueError): 58 | EpsilonGreedy(n_actions=5, epsilon=-0.2) 59 | 60 | 61 | def test_egreedy_select_action_exploitation(): 62 | trial_num = 50 63 | policy = EpsilonGreedy(n_actions=2, epsilon=0.0) 64 | policy.action_counts = np.array([3, 3]) 65 | policy.reward_counts = np.array([3, 0]) 66 | for _ in range(trial_num): 67 | assert policy.select_action()[0] == 0 68 | 69 | 70 | def test_egreedy_select_action_exploration(): 71 | trial_num = 50 72 | policy = EpsilonGreedy(n_actions=2, epsilon=1.0) 73 | policy.action_counts = np.array([3, 3]) 74 | policy.reward_counts = np.array([3, 0]) 75 | selected_action = [policy.select_action() for _ in range(trial_num)] 76 | assert 0 < sum(selected_action)[0] < trial_num 77 | 78 | 79 | def test_egreedy_update_params(): 80 | policy = EpsilonGreedy(n_actions=2, epsilon=1.0) 81 | policy.action_counts_temp = np.array([4, 3]) 82 | policy.action_counts = np.copy(policy.action_counts_temp) 83 | policy.reward_counts_temp = np.array([2.0, 0.0]) 84 | policy.reward_counts = np.copy(policy.reward_counts_temp) 85 | action = 0 86 | reward = 1.0 87 | policy.update_params(action, reward) 88 | assert np.array_equal(policy.action_counts, np.array([5, 3])) 89 | assert np.allclose(policy.reward_counts, np.array([2.0 + reward, 0.0])) 90 | 91 | 92 | def test_random_compute_batch_action_dist(): 93 | n_actions = 10 94 | len_list = 5 95 | n_rounds = 100 96 | policy = Random(n_actions=n_actions, len_list=len_list) 97 | action_dist = policy.compute_batch_action_dist(n_rounds=n_rounds) 98 | assert action_dist.shape[0] == n_rounds 99 | assert action_dist.shape[1] == n_actions 100 | assert action_dist.shape[2] == len_list 101 | assert len(np.unique(action_dist)) == 1 102 | assert np.unique(action_dist)[0] == 1 / n_actions 103 | 104 | 105 | def test_bernoulli_ts_zozotown_prior(): 106 | 107 | with pytest.raises(Exception): 108 | BernoulliTS(n_actions=2, is_zozotown_prior=True) 109 | 110 | policy_all = BernoulliTS(n_actions=2, is_zozotown_prior=True, campaign="all") 111 | # check whether it is not an non-informative prior parameter (i.e., default parameter) 112 | assert len(np.unique(policy_all.alpha)) != 1 113 | assert len(np.unique(policy_all.beta)) != 1 114 | 115 | policy_men = BernoulliTS(n_actions=2, is_zozotown_prior=True, campaign="men") 116 | assert len(np.unique(policy_men.alpha)) != 1 117 | assert len(np.unique(policy_men.beta)) != 1 118 | 119 | policy_women = BernoulliTS(n_actions=2, is_zozotown_prior=True, campaign="women") 120 | assert len(np.unique(policy_women.alpha)) != 1 121 | assert len(np.unique(policy_women.beta)) != 1 122 | 123 | 124 | def test_bernoulli_ts_select_action(): 125 | # invalid relationship between n_actions and len_list 126 | with pytest.raises(ValueError): 127 | BernoulliTS(n_actions=5, len_list=10) 128 | 129 | with pytest.raises(ValueError): 130 | BernoulliTS(n_actions=2, len_list=3) 131 | 132 | policy1 = BernoulliTS(n_actions=3, len_list=3) 133 | assert np.allclose(np.sort(policy1.select_action()), np.array([0, 1, 2])) 134 | 135 | policy = BernoulliTS(n_actions=5, len_list=3) 136 | assert len(policy.select_action()) == 3 137 | 138 | 139 | def test_bernoulli_ts_update_params(): 140 | policy = BernoulliTS(n_actions=2) 141 | policy.action_counts_temp = np.array([4, 3]) 142 | policy.action_counts = np.copy(policy.action_counts_temp) 143 | policy.reward_counts_temp = np.array([2.0, 0.0]) 144 | policy.reward_counts = np.copy(policy.reward_counts_temp) 145 | action = 0 146 | reward = 1.0 147 | policy.update_params(action, reward) 148 | assert np.array_equal(policy.action_counts, np.array([5, 3])) 149 | # in bernoulli ts, reward_counts is defined as the sum of observed rewards for each action 150 | next_reward = 2.0 + reward 151 | assert np.allclose(policy.reward_counts, np.array([next_reward, 0.0])) 152 | 153 | 154 | def test_bernoulli_ts_compute_batch_action_dist(): 155 | n_rounds = 10 156 | n_actions = 5 157 | len_list = 2 158 | policy = BernoulliTS(n_actions=n_actions, len_list=len_list) 159 | action_dist = policy.compute_batch_action_dist(n_rounds=n_rounds, n_sim=30) 160 | assert action_dist.shape[0] == n_rounds 161 | assert action_dist.shape[1] == n_actions 162 | assert action_dist.shape[2] == len_list 163 | -------------------------------------------------------------------------------- /tests/policy/test_logistic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from obp.policy.logistic import LogisticEpsilonGreedy 5 | from obp.policy.logistic import LogisticTS 6 | from obp.policy.logistic import LogisticUCB 7 | from obp.policy.logistic import MiniBatchLogisticRegression 8 | 9 | 10 | def test_logistic_base_exception(): 11 | # invalid dim 12 | with pytest.raises(ValueError): 13 | LogisticEpsilonGreedy(n_actions=2, dim=-3) 14 | 15 | with pytest.raises(ValueError): 16 | LogisticEpsilonGreedy(n_actions=2, dim=0) 17 | 18 | with pytest.raises(TypeError): 19 | LogisticEpsilonGreedy(n_actions=2, dim="3") 20 | 21 | # invalid n_actions 22 | with pytest.raises(ValueError): 23 | LogisticEpsilonGreedy(n_actions=-3, dim=2) 24 | 25 | with pytest.raises(ValueError): 26 | LogisticEpsilonGreedy(n_actions=1, dim=2) 27 | 28 | with pytest.raises(TypeError): 29 | LogisticEpsilonGreedy(n_actions="2", dim=2) 30 | 31 | # invalid len_list 32 | with pytest.raises(ValueError): 33 | LogisticEpsilonGreedy(n_actions=2, dim=2, len_list=-3) 34 | 35 | with pytest.raises(ValueError): 36 | LogisticEpsilonGreedy(n_actions=2, dim=2, len_list=0) 37 | 38 | with pytest.raises(TypeError): 39 | LogisticEpsilonGreedy(n_actions=2, dim=2, len_list="3") 40 | 41 | # invalid batch_size 42 | with pytest.raises(ValueError): 43 | LogisticEpsilonGreedy(n_actions=2, dim=2, batch_size=-2) 44 | 45 | with pytest.raises(ValueError): 46 | LogisticEpsilonGreedy(n_actions=2, dim=2, batch_size=0) 47 | 48 | with pytest.raises(TypeError): 49 | LogisticEpsilonGreedy(n_actions=2, dim=2, batch_size="10") 50 | 51 | # invalid relationship between n_actions and len_list 52 | with pytest.raises(ValueError): 53 | LogisticEpsilonGreedy(n_actions=5, len_list=10, dim=2) 54 | 55 | with pytest.raises(ValueError): 56 | LogisticEpsilonGreedy(n_actions=2, len_list=3, dim=2) 57 | 58 | 59 | def test_logistic_epsilon_normal_epsilon(): 60 | 61 | policy1 = LogisticEpsilonGreedy(n_actions=2, dim=2) 62 | assert 0 <= policy1.epsilon <= 1 63 | 64 | policy2 = LogisticEpsilonGreedy(n_actions=2, dim=2, epsilon=0.5) 65 | assert policy2.epsilon == 0.5 66 | 67 | 68 | def test_logistic_epsilon_abnormal_epsilon(): 69 | 70 | with pytest.raises(ValueError): 71 | LogisticEpsilonGreedy(n_actions=2, dim=2, epsilon=1.3) 72 | 73 | with pytest.raises(ValueError): 74 | LogisticEpsilonGreedy(n_actions=2, dim=2, epsilon=-0.3) 75 | 76 | 77 | def test_logistic_epsilon_each_action_model(): 78 | n_actions = 3 79 | policy = LogisticEpsilonGreedy(n_actions=n_actions, dim=2, epsilon=0.5) 80 | for i in range(n_actions): 81 | assert isinstance(policy.model_list[i], MiniBatchLogisticRegression) 82 | 83 | 84 | def test_logistic_epsilon_select_action_exploitation(): 85 | trial_num = 50 86 | policy = LogisticEpsilonGreedy(n_actions=2, dim=2, epsilon=0.0) 87 | context = np.array([1.0, 1.0]).reshape(1, -1) 88 | policy.update_params(action=0, reward=1.0, context=context) 89 | policy.update_params(action=0, reward=1.0, context=context) 90 | policy.update_params(action=1, reward=1.0, context=context) 91 | policy.update_params(action=1, reward=0.0, context=context) 92 | for _ in range(trial_num): 93 | assert policy.select_action(context=context)[0] == 0 94 | 95 | 96 | def test_logistic_epsilon_select_action_exploration(): 97 | trial_num = 50 98 | policy = LogisticEpsilonGreedy(n_actions=2, dim=2, epsilon=1.0) 99 | context = np.array([1.0, 1.0]).reshape(1, -1) 100 | policy.update_params(action=0, reward=1.0, context=context) 101 | policy.update_params(action=0, reward=1.0, context=context) 102 | policy.update_params(action=1, reward=1.0, context=context) 103 | policy.update_params(action=1, reward=0.0, context=context) 104 | selected_action = [policy.select_action(context=context) for _ in range(trial_num)] 105 | assert 0 < sum(selected_action)[0] < trial_num 106 | 107 | 108 | def test_logistic_ucb_initialize(): 109 | # note that the meaning of epsilon is different from that of LogisticEpsilonGreedy 110 | with pytest.raises(ValueError): 111 | LogisticUCB(n_actions=2, dim=2, epsilon=-0.2) 112 | 113 | n_actions = 3 114 | policy = LogisticUCB(n_actions=n_actions, dim=2, epsilon=0.5) 115 | for i in range(n_actions): 116 | assert isinstance(policy.model_list[i], MiniBatchLogisticRegression) 117 | 118 | 119 | def test_logistic_ucb_select_action(): 120 | dim = 3 121 | len_list = 2 122 | policy = LogisticUCB(n_actions=4, dim=dim, len_list=2, epsilon=0.0) 123 | context = np.ones(dim).reshape(1, -1) 124 | action = policy.select_action(context=context) 125 | assert len(action) == len_list 126 | 127 | 128 | def test_logistic_ts_initialize(): 129 | n_actions = 3 130 | policy = LogisticTS(n_actions=n_actions, dim=2) 131 | for i in range(n_actions): 132 | assert isinstance(policy.model_list[i], MiniBatchLogisticRegression) 133 | 134 | 135 | def test_logistic_ts_select_action(): 136 | dim = 3 137 | len_list = 2 138 | policy = LogisticTS(n_actions=4, dim=dim, len_list=2) 139 | context = np.ones(dim).reshape(1, -1) 140 | action = policy.select_action(context=context) 141 | assert len(action) == len_list 142 | -------------------------------------------------------------------------------- /tests/policy/test_offline_learner_continuous_performance.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Optional 3 | from typing import Tuple 4 | from typing import Union 5 | 6 | from joblib import delayed 7 | from joblib import Parallel 8 | import numpy as np 9 | import pytest 10 | 11 | from obp.dataset import linear_behavior_policy_continuous 12 | from obp.dataset import linear_reward_funcion_continuous 13 | from obp.dataset import SyntheticContinuousBanditDataset 14 | from obp.policy import BaseContinuousOfflinePolicyLearner 15 | from obp.policy import ContinuousNNPolicyLearner 16 | 17 | 18 | # n_rounds, dim_context, action_noise, reward_noise, min_action_value, max_action_value, pg_method, bandwidth 19 | offline_experiment_configurations = [ 20 | ( 21 | 1500, 22 | 10, 23 | 1.0, 24 | 1.0, 25 | -10.0, 26 | 10.0, 27 | "dpg", 28 | None, 29 | ), 30 | ( 31 | 2000, 32 | 5, 33 | 1.0, 34 | 1.0, 35 | 0.0, 36 | 100.0, 37 | "dpg", 38 | None, 39 | ), 40 | ] 41 | 42 | 43 | @dataclass 44 | class RandomPolicy(BaseContinuousOfflinePolicyLearner): 45 | output_space: Tuple[Union[int, float], Union[int, float]] = None 46 | 47 | def fit(self): 48 | raise NotImplementedError 49 | 50 | def predict(self, context: np.ndarray) -> np.ndarray: 51 | 52 | n_rounds = context.shape[0] 53 | predicted_actions = np.random.uniform( 54 | self.output_space[0], self.output_space[1], size=n_rounds 55 | ) 56 | return predicted_actions 57 | 58 | 59 | @pytest.mark.parametrize( 60 | "n_rounds, dim_context, action_noise, reward_noise, min_action_value, max_action_value, pg_method, bandwidth", 61 | offline_experiment_configurations, 62 | ) 63 | def test_offline_nn_policy_learner_performance( 64 | n_rounds: int, 65 | dim_context: int, 66 | action_noise: float, 67 | reward_noise: float, 68 | min_action_value: float, 69 | max_action_value: float, 70 | pg_method: str, 71 | bandwidth: Optional[float], 72 | ) -> None: 73 | def process(i: int): 74 | # synthetic data generator 75 | dataset = SyntheticContinuousBanditDataset( 76 | dim_context=dim_context, 77 | action_noise=action_noise, 78 | reward_noise=reward_noise, 79 | min_action_value=min_action_value, 80 | max_action_value=max_action_value, 81 | reward_function=linear_reward_funcion_continuous, 82 | behavior_policy_function=linear_behavior_policy_continuous, 83 | random_state=i, 84 | ) 85 | # define evaluation policy using NNPolicyLearner 86 | nn_policy = ContinuousNNPolicyLearner( 87 | dim_context=dim_context, 88 | pg_method=pg_method, 89 | bandwidth=bandwidth, 90 | output_space=(min_action_value, max_action_value), 91 | hidden_layer_size=(10, 10), 92 | learning_rate_init=0.001, 93 | max_iter=200, 94 | solver="sgd", 95 | q_func_estimator_hyperparams={"max_iter": 200}, 96 | ) 97 | # baseline method 1. RandomPolicy 98 | random_policy = RandomPolicy(output_space=(min_action_value, max_action_value)) 99 | # sample new training and test sets of synthetic logged bandit data 100 | bandit_feedback_train = dataset.obtain_batch_bandit_feedback( 101 | n_rounds=n_rounds, 102 | ) 103 | bandit_feedback_test = dataset.obtain_batch_bandit_feedback( 104 | n_rounds=n_rounds, 105 | ) 106 | # train the evaluation policy on the training set of the synthetic logged bandit data 107 | nn_policy.fit( 108 | context=bandit_feedback_train["context"], 109 | action=bandit_feedback_train["action"], 110 | reward=bandit_feedback_train["reward"], 111 | pscore=bandit_feedback_train["pscore"], 112 | ) 113 | # predict the action decisions for the test set of the synthetic logged bandit data 114 | actions_predicted_by_nn_policy = nn_policy.predict( 115 | context=bandit_feedback_test["context"], 116 | ) 117 | actions_predicted_by_random = random_policy.predict( 118 | context=bandit_feedback_test["context"], 119 | ) 120 | # get the ground truth policy value for each learner 121 | gt_nn_policy_learner = dataset.calc_ground_truth_policy_value( 122 | context=bandit_feedback_test["context"], 123 | action=actions_predicted_by_nn_policy, 124 | ) 125 | gt_random_policy = dataset.calc_ground_truth_policy_value( 126 | context=bandit_feedback_test["context"], 127 | action=actions_predicted_by_random, 128 | ) 129 | 130 | return gt_nn_policy_learner, gt_random_policy 131 | 132 | n_runs = 10 133 | processed = Parallel( 134 | n_jobs=1, # PyTorch uses multiple threads 135 | verbose=0, 136 | )([delayed(process)(i) for i in np.arange(n_runs)]) 137 | list_gt_nn_policy, list_gt_random = [], [] 138 | for i, ground_truth_policy_values in enumerate(processed): 139 | gt_nn_policy, gt_random = ground_truth_policy_values 140 | list_gt_nn_policy.append(gt_nn_policy) 141 | list_gt_random.append(gt_random) 142 | 143 | assert np.mean(list_gt_nn_policy) > np.mean(list_gt_random) 144 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from obp.utils import sample_action_fast 4 | from obp.utils import softmax 5 | 6 | 7 | def test_sample_action_fast(): 8 | n_rounds = 10 9 | n_actions = 5 10 | n_sim = 100000 11 | 12 | true_probs = softmax(np.random.normal(size=(n_rounds, n_actions))) 13 | sampled_action_list = list() 14 | for _ in np.arange(n_sim): 15 | sampled_action_list.append(sample_action_fast(true_probs)[:, np.newaxis]) 16 | 17 | sampled_action_arr = np.concatenate(sampled_action_list, 1) 18 | for i in np.arange(n_rounds): 19 | sampled_action_counts = np.unique(sampled_action_arr[i], return_counts=True)[1] 20 | empirical_probs = sampled_action_counts / n_sim 21 | assert np.isclose(true_probs[i], empirical_probs, rtol=5e-2, atol=1e-3).all() 22 | --------------------------------------------------------------------------------