├── .github
    └── workflows
    │   ├── lints.yml
    │   └── tests.yml
├── .gitignore
├── .readthedocs.yml
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── README_JN.md
├── benchmark
    ├── README.md
    ├── cf_policy_search
    │   ├── README.md
    │   ├── conf
    │   │   └── hyperparams.yaml
    │   ├── custom_dataset.py
    │   └── run_cf_policy_search.py
    └── ope
    │   ├── README.md
    │   ├── benchmark_ope_estimators.py
    │   ├── benchmark_ope_estimators_hypara.py
    │   ├── conf
    │       ├── config.yaml
    │       ├── estimator_hyperparams
    │       │   └── default.yaml
    │       ├── reg_model_hyperparams
    │       │   └── default.yaml
    │       └── setting
    │       │   └── default.yaml
    │   ├── poetry.lock
    │   └── pyproject.toml
├── docs
    ├── Makefile
    ├── _static
    │   └── images
    │   │   ├── dataset.png
    │   │   ├── evaluation_of_ope_algo.png
    │   │   ├── logo.png
    │   │   ├── overview.png
    │   │   ├── recommended_fashion_items.png
    │   │   ├── related_data.png
    │   │   ├── related_packages.png
    │   │   └── statistics_of_obd.png
    ├── about.rst
    ├── conf.py
    ├── estimators.rst
    ├── evaluation_ope.rst
    ├── index.rst
    ├── installation.rst
    ├── make.bat
    ├── obp.rst
    ├── ope.rst
    ├── quickstart.rst
    ├── references.rst
    ├── refs.bib
    ├── related.rst
    └── requirements.txt
├── examples
    ├── README.md
    ├── multiclass
    │   ├── README.md
    │   ├── conf
    │   │   └── hyperparams.yaml
    │   └── evaluate_off_policy_estimators.py
    ├── obd
    │   ├── README.md
    │   ├── conf
    │   │   └── hyperparams.yaml
    │   └── evaluate_off_policy_estimators.py
    ├── opl
    │   ├── README.md
    │   ├── conf
    │   │   └── hyperparams.yaml
    │   └── evaluate_off_policy_learners.py
    ├── quickstart
    │   ├── README.md
    │   ├── multiclass.ipynb
    │   ├── obd.ipynb
    │   ├── online-bandit-vs-opl-simulation.ipynb
    │   ├── online-bandit-with-delay-simulation.ipynb
    │   ├── online-bandit-with-drift-simulation.ipynb
    │   ├── opl.ipynb
    │   ├── replay.ipynb
    │   ├── synthetic.ipynb
    │   └── synthetic_slate.ipynb
    ├── replay
    │   ├── README.md
    │   └── evaluate_off_policy_estimators.py
    └── synthetic
    │   ├── README.md
    │   ├── conf
    │       └── hyperparams.yaml
    │   └── evaluate_off_policy_estimators.py
├── images
    ├── dataset.png
    ├── logo.png
    ├── obd_stats.png
    ├── ope_results_example.png
    ├── overview.png
    └── recommended_fashion_items.png
├── obd
    ├── README.md
    ├── README_JN.md
    ├── bts
    │   ├── all
    │   │   ├── all.csv
    │   │   └── item_context.csv
    │   ├── men
    │   │   ├── item_context.csv
    │   │   └── men.csv
    │   └── women
    │   │   ├── item_context.csv
    │   │   └── women.csv
    └── random
    │   ├── all
    │       ├── all.csv
    │       └── item_context.csv
    │   ├── men
    │       ├── item_context.csv
    │       └── men.csv
    │   └── women
    │       ├── item_context.csv
    │       └── women.csv
├── obp
    ├── __init__.py
    ├── dataset
    │   ├── __init__.py
    │   ├── base.py
    │   ├── multiclass.py
    │   ├── obd
    │   │   ├── bts
    │   │   │   ├── all
    │   │   │   │   ├── all.csv
    │   │   │   │   └── item_context.csv
    │   │   │   ├── men
    │   │   │   │   ├── item_context.csv
    │   │   │   │   └── men.csv
    │   │   │   └── women
    │   │   │   │   ├── item_context.csv
    │   │   │   │   └── women.csv
    │   │   └── random
    │   │   │   ├── all
    │   │   │       ├── all.csv
    │   │   │       └── item_context.csv
    │   │   │   ├── men
    │   │   │       ├── item_context.csv
    │   │   │       └── men.csv
    │   │   │   └── women
    │   │   │       ├── item_context.csv
    │   │   │       └── women.csv
    │   ├── real.py
    │   ├── reward_type.py
    │   ├── synthetic.py
    │   ├── synthetic_continuous.py
    │   ├── synthetic_embed.py
    │   ├── synthetic_multi.py
    │   └── synthetic_slate.py
    ├── ope
    │   ├── __init__.py
    │   ├── classification_model.py
    │   ├── estimators.py
    │   ├── estimators_continuous.py
    │   ├── estimators_embed.py
    │   ├── estimators_multi.py
    │   ├── estimators_slate.py
    │   ├── estimators_tuning.py
    │   ├── helper.py
    │   ├── meta.py
    │   ├── meta_continuous.py
    │   ├── meta_multi.py
    │   ├── meta_slate.py
    │   ├── regression_model.py
    │   └── regression_model_slate.py
    ├── policy
    │   ├── __init__.py
    │   ├── base.py
    │   ├── conf
    │   │   └── prior_bts.yaml
    │   ├── contextfree.py
    │   ├── linear.py
    │   ├── logistic.py
    │   ├── offline.py
    │   ├── offline_continuous.py
    │   └── policy_type.py
    ├── simulator
    │   ├── __init__.py
    │   ├── coefficient_drifter.py
    │   ├── delay_sampler.py
    │   ├── replay.py
    │   └── simulator.py
    ├── types.py
    ├── utils.py
    └── version.py
├── poetry.lock
├── pyproject.toml
├── setup.cfg
├── setup.py
├── slides
    ├── slides_EN.pdf
    └── slides_JN.pdf
└── tests
    ├── dataset
        ├── test_multiclass.py
        ├── test_real.py
        ├── test_synthetic.py
        ├── test_synthetic_continuous.py
        ├── test_synthetic_embed.py
        ├── test_synthetic_multi.py
        ├── test_synthetic_slate.py
        └── test_synthetic_slate_functions.py
    ├── ope
        ├── conftest.py
        ├── hyperparams.yaml
        ├── hyperparams_slate.yaml
        ├── test_all_estimators.py
        ├── test_bipw_estimators.py
        ├── test_dm_estimators.py
        ├── test_dr_estimators.py
        ├── test_dr_estimators_continuous.py
        ├── test_dr_estimators_multi.py
        ├── test_dr_estimators_slate.py
        ├── test_importance_weight_estimator.py
        ├── test_ipw_estimators.py
        ├── test_ipw_estimators_continuous.py
        ├── test_ipw_estimators_embed.py
        ├── test_ipw_estimators_multi.py
        ├── test_ipw_estimators_slate.py
        ├── test_kernel_functions.py
        ├── test_meta.py
        ├── test_meta_continuous.py
        ├── test_meta_multi.py
        ├── test_meta_slate.py
        ├── test_offline_estimation_performance.py
        ├── test_propensity_score_estimator.py
        ├── test_regression_models.py
        └── test_regression_models_slate.py
    ├── policy
        ├── test_contextfree.py
        ├── test_linear.py
        ├── test_logistic.py
        ├── test_offline.py
        ├── test_offline_continuous.py
        ├── test_offline_learner_continuous_performance.py
        └── test_offline_learner_performance.py
    ├── simulator
        ├── test_coefficient_drifter.py
        ├── test_delay_sampler.py
        └── test_simulator.py
    └── test_utils.py


/.github/workflows/lints.yml:
--------------------------------------------------------------------------------
 1 | name: Lints
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   pull_request: {}
 8 | 
 9 | jobs:
10 |   lints:
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |       - name: Checkout
15 |         uses: actions/checkout@v2
16 | 
17 |       - name: Setup Python
18 |         uses: actions/setup-python@v2
19 |         with:
20 |           python-version: 3.7
21 | 
22 |       - name: Black
23 |         uses: psf/black@stable
24 |         with:
25 |           args: ". --check --diff"
26 | 
27 |       - name: flake8
28 |         run: |
29 |           python -m pip install --upgrade pip
30 |           pip install flake8
31 |           flake8 .
32 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   pull_request: {}
 8 | 
 9 | jobs:
10 |   tests:
11 |     runs-on: ubuntu-latest
12 | 
13 |     strategy:
14 |       matrix:
15 |         python-version: [3.7, 3.8, 3.9]
16 | 
17 |     # Not intended for forks.
18 |     if: github.repository == 'st-tech/zr-obp'
19 | 
20 |     steps:
21 |       - name: Checkout
22 |         uses: actions/checkout@v2
23 | 
24 |       - name: Setup Python${{ matrix.python-version }}
25 |         uses: actions/setup-python@v2
26 |         with:
27 |           python-version: ${{ matrix.python-version }}
28 | 
29 |       - name: Install
30 |         run: |
31 |           python -m pip install --upgrade pip
32 |           pip install --progress-bar off -U setuptools
33 | 
34 |           # Install pytest
35 |           pip install --progress-bar off .
36 | 
37 |           pip install --progress-bar off pytest
38 | 
39 |       - name: Tests
40 |         run: |
41 |           pytest tests
42 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # additionals
132 | _autosummary/
133 | __pycache__/
134 | build/
135 | dist/
136 | *.egg-info/
137 | .vscode/
138 | .DS_Store
139 | logs/
140 | open_bandit_dataset/
141 | note.ipynb
142 | *.npy
143 | *.html
144 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # Required
 2 | version: 2
 3 | 
 4 | # Build documentation in the docs/ directory with Sphinx
 5 | sphinx:
 6 |   configuration: docs/conf.py
 7 | 
 8 | # Optionally build your docs in additional formats such as PDF and ePub
 9 | formats: all
10 | 
11 | # Optionally set the version of Python and requirements required to build your docs
12 | python:
13 |   version: 3.7
14 |   install:
15 |     - requirements: requirements.txt
16 |     - requirements: docs/requirements.txt
17 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | ## Contribution Guidelines
 2 | First off, thanks for your interest!
 3 | 
 4 | We are doing our best to make this project even better. However, we recognize that there is ample room for improvement.
 5 | We need your help to make this project even better. 
 6 | Let's make the best Off-Policy Evaluation software together!
 7 | 
 8 | We prepare some conventions as follows:
 9 | 
10 | - [Coding Guidelines](#coding-guidelines)
11 | - [Tests](#tests)
12 | - [Continuous Integration](#continuous-integration)
13 | 
14 | ## Coding Guidelines
15 | 
16 | Code is formatted with [black](https://github.com/psf/black),
17 | and coding style is checked with [flake8](http://flake8.pycqa.org).
18 | 
19 | After installing black, you can perform code formatting by the following command:
20 | 
21 | ```bash
22 | # perform formatting recursively for the files under the current dir
23 | $ black .
24 | ```
25 | 
26 | After installing flake8, you can check the coding style by the following command:
27 | 
28 | ```bash
29 | # perform checking of the coding style
30 | $ flake8 .
31 | ```
32 | 
33 | ## Tests
34 | 
35 | We employ pytest as the testing framework. You can run all the tests as follows:
36 | 
37 | ```bash
38 | # perform all the tests under the tests directory
39 | $ pytest .
40 | ```
41 | 
42 | ## Continuous Integration
43 | 
44 | Open Bandit Pipeline uses Github Actions to perform continuous integration.
45 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.txt
2 | include *.md
3 | recursive-include obp/policy/conf *
4 | recursive-include obp/dataset/obd *
5 | include LICENSE
6 | 


--------------------------------------------------------------------------------
/benchmark/README.md:
--------------------------------------------------------------------------------
1 | # Benchmark Experiments
2 | ---
3 | This directory includes some benchmark experiments and demonstrations about off-policy evaluation using [the full size Open Bandit Dataset](https://research.zozo.com/data.html). The detailed description, results, and discussions can be found in [the relevant paper](https://arxiv.org/abs/2008.07146).
4 | 
5 | - [`cf_policy_search`](./cf_policy_search): counterfactual policy search using OPE
6 | - [`ope`](./ope): estimation performance comparisons on a variety of OPE estimators
7 | 


--------------------------------------------------------------------------------
/benchmark/cf_policy_search/README.md:
--------------------------------------------------------------------------------
 1 | # Counterfactual Policy Search
 2 | 
 3 | ## Description
 4 | 
 5 | ## Running Counterfactual Policy Search
 6 | 
 7 | ```
 8 | for model in lightgbm
 9 | do
10 |     for context in 1
11 |     do
12 |         for camp in men
13 |         do
14 |             screen python run_cf_policy_search.py\
15 |                 --context_set $context\
16 |                 --base_model $model\
17 |                 --behavior_policy bts\
18 |                 --campaign $camp
19 |         done
20 |     done
21 | done
22 | ```
23 | 
24 | ```
25 | python run_cf_policy_search.py --context_set 1 --base_model logistic_regression --campaign men --n_boot_samples 2 --test_size 0.9
26 | ```
27 | 
28 | ## Results
29 | 


--------------------------------------------------------------------------------
/benchmark/cf_policy_search/conf/hyperparams.yaml:
--------------------------------------------------------------------------------
 1 | lightgbm:
 2 |   max_iter: 500
 3 |   learning_rate: 0.005
 4 |   max_depth: 5
 5 |   min_samples_leaf: 10
 6 |   random_state: 12345
 7 | logistic_regression:
 8 |   max_iter: 10000
 9 |   C: 1000
10 |   random_state: 12345
11 | random_forest:
12 |   n_estimators: 500
13 |   max_depth: 5
14 |   min_samples_leaf: 10
15 |   random_state: 12345
16 | 


--------------------------------------------------------------------------------
/benchmark/cf_policy_search/custom_dataset.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | from sklearn.decomposition import PCA
 6 | 
 7 | from obp.dataset import OpenBanditDataset
 8 | 
 9 | 
10 | @dataclass
11 | class OBDWithInteractionFeatures(OpenBanditDataset):
12 |     context_set: str = "1"
13 | 
14 |     def pre_process(self) -> None:
15 | 
16 |         if self.context_set == "1":
17 |             super().pre_process()
18 |         elif self.context_set == "2":
19 |             self._pre_process_context_set_2()
20 | 
21 |     def _pre_process_context_set_1(self) -> None:
22 |         """Create Context Set 1 (c.f., Section 5.2)"""
23 | 
24 |         user_cols = self.data.columns.str.contains("user_feature")
25 |         self.context = pd.get_dummies(
26 |             self.data.loc[:, user_cols], drop_first=True
27 |         ).values
28 | 
29 |     def _pre_process_context_set_2(self) -> None:
30 |         """Create Context Set 2 (c.f., Section 5.2)"""
31 | 
32 |         super().pre_process()
33 |         affinity_cols = self.data.columns.str.contains("affinity")
34 |         Xaffinity = self.data.loc[:, affinity_cols].values
35 |         self.context = PCA(n_components=30).fit_transform(
36 |             np.c_[self.context, Xaffinity]
37 |         )
38 | 


--------------------------------------------------------------------------------
/benchmark/cf_policy_search/run_cf_policy_search.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from pathlib import Path
  3 | 
  4 | from custom_dataset import OBDWithInteractionFeatures
  5 | from joblib import delayed
  6 | from joblib import Parallel
  7 | import numpy as np
  8 | from pandas import DataFrame
  9 | from sklearn.ensemble import GradientBoostingClassifier
 10 | from sklearn.ensemble import RandomForestClassifier
 11 | from sklearn.linear_model import LogisticRegression
 12 | import yaml
 13 | 
 14 | from obp.ope import InverseProbabilityWeighting
 15 | from obp.policy import IPWLearner
 16 | 
 17 | 
 18 | # hyperparameters of the regression model used in model dependent OPE estimators
 19 | with open("./conf/hyperparams.yaml", "rb") as f:
 20 |     hyperparams = yaml.safe_load(f)
 21 | 
 22 | base_model_dict = dict(
 23 |     logistic_regression=LogisticRegression,
 24 |     lightgbm=GradientBoostingClassifier,
 25 |     random_forest=RandomForestClassifier,
 26 | )
 27 | 
 28 | if __name__ == "__main__":
 29 |     parser = argparse.ArgumentParser(description="run evaluation policy selection.")
 30 |     parser.add_argument(
 31 |         "--n_runs",
 32 |         type=int,
 33 |         default=5,
 34 |         help="number of bootstrap sampling in the experiment.",
 35 |     )
 36 |     parser.add_argument(
 37 |         "--context_set",
 38 |         type=str,
 39 |         choices=["1", "2"],
 40 |         required=True,
 41 |         help="context sets for contextual bandit policies.",
 42 |     )
 43 |     parser.add_argument(
 44 |         "--base_model",
 45 |         type=str,
 46 |         choices=["logistic_regression", "lightgbm", "random_forest"],
 47 |         required=True,
 48 |         help="base model for a evaluation policy to be evaluated",
 49 |     )
 50 |     parser.add_argument(
 51 |         "--behavior_policy",
 52 |         type=str,
 53 |         choices=["bts", "random"],
 54 |         default="random",
 55 |         help="behavior policy, bts or random.",
 56 |     )
 57 |     parser.add_argument(
 58 |         "--campaign",
 59 |         type=str,
 60 |         choices=["all", "men", "women"],
 61 |         required=True,
 62 |         help="campaign name, men, women, or all.",
 63 |     )
 64 |     parser.add_argument(
 65 |         "--test_size",
 66 |         type=float,
 67 |         default=0.5,
 68 |         help="the proportion of the dataset to include in the test split.",
 69 |     )
 70 |     parser.add_argument(
 71 |         "--n_jobs",
 72 |         type=int,
 73 |         default=1,
 74 |         help="the maximum number of concurrently running jobs.",
 75 |     )
 76 |     parser.add_argument("--random_state", type=int, default=12345)
 77 |     args = parser.parse_args()
 78 |     print(args)
 79 | 
 80 |     # configurations
 81 |     n_runs = args.n_runs
 82 |     context_set = args.context_set
 83 |     base_model = args.base_model
 84 |     behavior_policy = args.behavior_policy
 85 |     campaign = args.campaign
 86 |     test_size = args.test_size
 87 |     n_jobs = args.n_jobs
 88 |     random_state = args.random_state
 89 |     np.random.seed(random_state)
 90 |     data_path = Path("../open_bandit_dataset")
 91 | 
 92 |     # define a dataset class
 93 |     obd = OBDWithInteractionFeatures(
 94 |         behavior_policy=behavior_policy,
 95 |         campaign=campaign,
 96 |         data_path=data_path,
 97 |         context_set=context_set,
 98 |     )
 99 |     # define a counterfactual policy based on IPWLearner
100 |     counterfactual_policy = IPWLearner(
101 |         base_model=base_model_dict[base_model](**hyperparams[base_model]),
102 |         n_actions=obd.n_actions,
103 |         len_list=obd.len_list,
104 |     )
105 |     policy_name = f"{base_model}_{context_set}"
106 | 
107 |     # ground-truth policy value of the Bernoulli TS policy (the current best policy) in the test set
108 |     # , which is the empirical mean of the factual (observed) rewards (on-policy estimation)
109 |     ground_truth = obd.calc_on_policy_policy_value_estimate(
110 |         behavior_policy="bts",
111 |         campaign=campaign,
112 |         data_path=data_path,
113 |         test_size=test_size,
114 |         is_timeseries_split=True,
115 |     )
116 | 
117 |     def process(b: int):
118 |         # sample bootstrap from batch logged bandit feedback
119 |         boot_bandit_feedback = obd.sample_bootstrap_bandit_feedback(
120 |             test_size=test_size, is_timeseries_split=True, random_state=b
121 |         )
122 |         # train an evaluation on the training set of the logged bandit feedback data
123 |         action_dist = counterfactual_policy.fit(
124 |             context=boot_bandit_feedback["context"],
125 |             action=boot_bandit_feedback["action"],
126 |             reward=boot_bandit_feedback["reward"],
127 |             pscore=boot_bandit_feedback["pscore"],
128 |             position=boot_bandit_feedback["position"],
129 |         )
130 |         # make action selections (predictions)
131 |         action_dist = counterfactual_policy.predict(
132 |             context=boot_bandit_feedback["context_test"]
133 |         )
134 |         # estimate the policy value of a given counterfactual algorithm by the three OPE estimators.
135 |         ipw = InverseProbabilityWeighting()
136 |         return ipw.estimate_policy_value(
137 |             reward=boot_bandit_feedback["reward_test"],
138 |             action=boot_bandit_feedback["action_test"],
139 |             position=boot_bandit_feedback["position_test"],
140 |             pscore=boot_bandit_feedback["pscore_test"],
141 |             action_dist=action_dist,
142 |         )
143 | 
144 |     processed = Parallel(
145 |         backend="multiprocessing",
146 |         n_jobs=n_jobs,
147 |         verbose=50,
148 |     )([delayed(process)(i) for i in np.arange(n_runs)])
149 | 
150 |     # save counterfactual policy evaluation results in `./logs` directory
151 |     ope_results = np.zeros((n_runs, 2))
152 |     for b, estimated_policy_value_b in enumerate(processed):
153 |         ope_results[b, 0] = estimated_policy_value_b
154 |         ope_results[b, 1] = estimated_policy_value_b / ground_truth
155 |     save_path = Path("./logs") / behavior_policy / campaign
156 |     save_path.mkdir(exist_ok=True, parents=True)
157 |     DataFrame(
158 |         ope_results, columns=["policy_value", "relative_policy_value"]
159 |     ).describe().round(6).to_csv(save_path / f"{policy_name}.csv")
160 | 


--------------------------------------------------------------------------------
/benchmark/ope/README.md:
--------------------------------------------------------------------------------
  1 | # Benchmarking Off-Policy Evaluation
  2 | 
  3 | This directory includes the code to replicate the benchmark experiment done in the following paper.
  4 | 
  5 | Yuta Saito, Shunsuke Aihara, Megumi Matsutani, Yusuke Narita.<br>
  6 | **Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation**<br>
  7 | [https://arxiv.org/abs/2008.07146](https://arxiv.org/abs/2008.07146)
  8 | 
  9 | 
 10 | If you find this code useful in your research then please cite:
 11 | ```
 12 | @article{saito2020open,
 13 |   title={Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation},
 14 |   author={Saito, Yuta and Shunsuke, Aihara and Megumi, Matsutani and Yusuke, Narita},
 15 |   journal={arXiv preprint arXiv:2008.07146},
 16 |   year={2020}
 17 | }
 18 | ```
 19 | 
 20 | ## Description
 21 | We use the (full size) open bandit dataset to evaluate and compare OPE estimators in a *realistic* and *reproducible* manner. Specifically, we evaluate the estimation performance of a wide variety of OPE estimators by comparing the policy values estimated by OPE with the on-policy policy value of an evaluation policy.
 22 | 
 23 | ### Dataset
 24 | Please download the full [open bandit dataset](https://research.zozo.com/data.html) and put it in the `../open_bandit_dataset/` directory.
 25 | 
 26 | ## Evaluating Off-Policy Estimators
 27 | 
 28 | In the benchmark experiment, we evaluate the estimation performance of the following OPE estimators.
 29 | 
 30 | - Direct Method (DM)
 31 | - Inverse Probability Weighting (IPW)
 32 | - Self-Normalized Inverse Probability Weighting (SNIPW)
 33 | - Doubly Robust (DR)
 34 | - Self-Normalized Doubly Robust (SNDR)
 35 | - Switch Doubly Robust (Switch-DR)
 36 | - Doubly Robust with Optimistic Shrinkage (DRos)
 37 | 
 38 | See Section 2 and Appendix B of [our paper](https://arxiv.org/abs/2008.07146) or the package [documentation](https://zr-obp.readthedocs.io/en/latest/estimators.html) for the details of these estimators.
 39 | 
 40 | For Switch-DR and DRos, we use a data-driven hyperparameter tuning method described in [Su et al.](https://arxiv.org/abs/1907.09623).
 41 | For estimators except for DM, we use the true action choice probability contained in Open Bandit Dataset.
 42 | For estimators except for IPW and SNIPW, we need to obtain a reward estimator.
 43 | We do this by using machine learning models (such as gradient boosting) implemented in *scikit-learn*.
 44 | We also use cross-fitting to avoid substantial bias from overfitting when obtaining a reward estimator.
 45 | 
 46 | ## Requirements and Setup
 47 | 
 48 | The Python environment is built using [poetry](https://github.com/python-poetry/poetry). You can build the same environment as in our benchmark experiment by cloning the repository and running `poetry install` directly under the folder (if you have not install poetry yet, please run `pip install poetry` first.).
 49 | 
 50 | ```bash
 51 | # clone the obp repository
 52 | git clone https://github.com/st-tech/zr-obp.git
 53 | cd benchmark/ope
 54 | 
 55 | # build the environment with poetry
 56 | poetry install
 57 | 
 58 | # run the benchmark experiment
 59 | poetry run python benchmark_off_policy_estimators.py ...
 60 | ```
 61 | 
 62 | The versions of Python and used packages are as follows.
 63 | 
 64 | ```
 65 | [tool.poetry.dependencies]
 66 | python = "^3.9,<3.10"
 67 | scikit-learn = "^0.24.2"
 68 | pandas = "^1.3.1"
 69 | numpy = "^1.21.1"
 70 | matplotlib = "^3.4.2"
 71 | hydra-core = "^1.0.7"
 72 | pingouin = "^0.4.0"
 73 | pyieoe = "^0.1.0"
 74 | obp = "^0.5.0"
 75 | ```
 76 | 
 77 | ## Files
 78 | 
 79 | - [benchmark_ope_estimators.py](https://github.com/st-tech/zr-obp/blob/master/benchmark/ope/benchmark_ope_estimators.py) implements the experimental workflow to evaluate and compare the above OPE estimators using Open Bandit Dataset. We summarize the detailed experimental protocol for evaluating OPE estimators using real-world data [here](https://zr-obp.readthedocs.io/en/latest/evaluation_ope.html).
 80 | - [benchmark_ope_estimators_hypara.py](https://github.com/st-tech/zr-obp/blob/master/benchmark/ope/benchmark_ope_estimators.py) evaluates the effect of the hyperparameter choice on the OPE performance of DRos.
 81 | - [./conf/](./conf/) specifies experimental settings such as the number of random seeds.
 82 | 
 83 | ## Scripts
 84 | The experimental workflow is implemented using [Hydra](https://github.com/facebookresearch/hydra). Below, we explain important experimental configurations.
 85 | 
 86 | ```bash
 87 | # run evaluation of OPE experiments on the full open bandit dataset
 88 | poetry run python benchmark_ope_estimators.py\
 89 |     setting.n_seeds=$n_seeds\
 90 |     setting.campaign=$campaign\
 91 |     setting.behavior_policy=$behavior_policy\
 92 |     setting.sample_size=$sample_size\
 93 |     setting.reg_model=$reg_model\
 94 |     setting.is_timeseries_split=$is_time_series_split
 95 | ```
 96 | 
 97 | - `$n_runs` specifies the number of random seeds used in the experiment.
 98 | - `$campaign` specifies the campaign considered in ZOZOTOWN and should be one of "all", "men", or "women".
 99 | - `$behavior_policy` specifies which policy in Random or Bernoulli TS (bts) is used as the behavior policy. This should be either of "random" or "bts".
100 | - `$sample_size` specifies the number of samples contained in the logged bandit feedback used to conduct OPE.
101 | - `$reg_model` specifies the base ML model for defining the regression model and should be one of "logistic_regression", "random_forest", or "lightgbm".
102 | - `$is_timeseries_split` is whether the data is split based on timestamp or not. If true, the out-sample performance of OPE is tested. See the relevant paper for details.
103 | 
104 | Please see [`./conf/setting/default.yaml`](./conf/setting/default.yaml) for the default experimental configurations, which are to be used when they are not overridden.
105 | 
106 | It is possible to run multiple experimental settings easily by using the `--multirun (-m)` option of Hydra.
107 | For example, the following script sweeps over all simulations including the three campaigns ('all', 'men',  and 'women') and two different behavior policies ('random' and 'bts').
108 | 
109 | ```bash
110 | poetry run python benchmark_ope_estimators.py setting.campaign=all,men,women setting.behavior_policy=random,bts --multirun
111 | ```
112 | 
113 | The experimental results (including the pairwise hypothesis test results) will be store in the `logs/` directory.
114 | Our benchmark results and findings can be found in Section 5 of [our paper](https://arxiv.org/abs/2008.07146).
115 | 


--------------------------------------------------------------------------------
/benchmark/ope/conf/config.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - estimator_hyperparams: default
 3 |   - reg_model_hyperparams: default
 4 |   - setting: default
 5 | hydra:
 6 |   run:
 7 |     dir: ./logs/is_timeseries_split=${setting.is_timeseries_split}/behavior_policy=${setting.behavior_policy}/campaign=${setting.campaign}/reg_model=${setting.reg_model}/sample_size=${setting.sample_size}
 8 |   sweep:
 9 |     dir: ./logs/is_timeseries_split=${setting.is_timeseries_split}/behavior_policy=${setting.behavior_policy}/campaign=${setting.campaign}/reg_model=${setting.reg_model}/sample_size=${setting.sample_size}
10 |     subdir: ./
11 | 


--------------------------------------------------------------------------------
/benchmark/ope/conf/estimator_hyperparams/default.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | # a set of candidate hyperparams
 3 | lambdas:
 4 |  - 1
 5 |  - 5
 6 |  - 10
 7 |  - 50
 8 |  - 100
 9 |  - 500
10 |  - 1000
11 |  - 5000
12 |  - 10000
13 | 


--------------------------------------------------------------------------------
/benchmark/ope/conf/reg_model_hyperparams/default.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | lightgbm:
 3 |   max_iter: 100
 4 |   learning_rate: 0.01
 5 |   max_depth: 5
 6 |   min_samples_leaf: 10
 7 |   random_state: 12345
 8 | logistic_regression:
 9 |   max_iter: 10000
10 |   C: 1000
11 |   random_state: 12345
12 | random_forest:
13 |   n_estimators: 100
14 |   max_depth: 5
15 |   min_samples_leaf: 10
16 |   random_state: 12345
17 | 


--------------------------------------------------------------------------------
/benchmark/ope/conf/setting/default.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | experiment: default
 3 | # dataset settings
 4 | n_seeds: 200
 5 | sample_size: 10000
 6 | campaign: men
 7 | behavior_policy: random
 8 | # regression model settings
 9 | reg_model: lightgbm
10 | n_folds: 2 # cross-fitting
11 | # other experimental settings
12 | is_full_obd: True
13 | is_timeseries_split: False
14 | test_size: 0.3
15 | random_state: 12345
16 | 


--------------------------------------------------------------------------------
/benchmark/ope/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "benchmark/ope"
 3 | version = "0.1.0"
 4 | description = "benchmarking OPE estimators on Open Bandit Dataset"
 5 | authors = ["usaito <ys552@cornell.edu>"]
 6 | 
 7 | [tool.poetry.dependencies]
 8 | python = "^3.9,<3.10"
 9 | scikit-learn = "^0.24.2"
10 | pandas = "^1.3.1"
11 | numpy = "^1.21.1"
12 | matplotlib = "^3.4.2"
13 | pingouin = "^0.4.0"
14 | pyieoe = "^0.1.0"
15 | obp = "^0.5.0"
16 | hydra-core = "1.0.7"
17 | 
18 | [tool.poetry.dev-dependencies]
19 | flake8 = "^3.9.2"
20 | black = "^21.7b0"
21 | 
22 | [build-system]
23 | requires = ["poetry-core>=1.0.0"]
24 | build-backend = "poetry.core.masonry.api"
25 | 


--------------------------------------------------------------------------------
/docs/_static/images/dataset.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/docs/_static/images/dataset.png


--------------------------------------------------------------------------------
/docs/_static/images/evaluation_of_ope_algo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/docs/_static/images/evaluation_of_ope_algo.png


--------------------------------------------------------------------------------
/docs/_static/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/docs/_static/images/logo.png


--------------------------------------------------------------------------------
/docs/_static/images/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/docs/_static/images/overview.png


--------------------------------------------------------------------------------
/docs/_static/images/recommended_fashion_items.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/docs/_static/images/recommended_fashion_items.png


--------------------------------------------------------------------------------
/docs/_static/images/related_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/docs/_static/images/related_data.png


--------------------------------------------------------------------------------
/docs/_static/images/related_packages.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/docs/_static/images/related_packages.png


--------------------------------------------------------------------------------
/docs/_static/images/statistics_of_obd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/docs/_static/images/statistics_of_obd.png


--------------------------------------------------------------------------------
/docs/about.rst:
--------------------------------------------------------------------------------
 1 | ===============
 2 | About
 3 | ===============
 4 | Motivated by the paucity of real-world data and implementation enabling the evaluation and comparison of OPE, we release the following open-source dataset and pipeline software for research uses.
 5 | 
 6 | 
 7 | Open Bandit Dataset (OBD)
 8 | ------------------------------
 9 | 
10 | *Open Bandit Dataset* is a public real-world logged bandit feedback data.
11 | The dataset is provided by `ZOZO, Inc. <https://corp.zozo.com/en/about/profile/>`_, the largest Japanese fashion e-commerce company with over 5 billion USD market capitalization (as of May 2020).
12 | The company uses multi-armed bandit algorithms to recommend fashion items to users in a large-scale fashion e-commerce platform called `ZOZOTOWN <https://zozo.jp/>`_.
13 | The following figure presents examples of displayed fashion items as actions.
14 | 
15 | .. image:: ./_static/images/recommended_fashion_items.png
16 |    :scale: 25%
17 |    :align: center
18 | 
19 | We collected the data in a 7-days experiment in late November 2019 on three campaigns, corresponding to "all", "men's", and "women's" items, respectively.
20 | Each campaign randomly uses either the Random policy or the Bernoulli Thompson Sampling (Bernoulli TS) policy for each user impression.
21 | Note that we pre-trained Bernoulli TS for over a month before the data collection process and the policy well converges to a fixed one.
22 | Thus, we suppose our data is generated by a fixed policy and apply the standard OPE formulation that assumes static behavior and evaluation policies.
23 | These policies select three of the possible fashion items to each user.
24 | Let :math:`\mathcal{I}:=\{0,\ldots,n\}` be a set of :math:`n+1` items and :math:`\mathcal{K}:=\{0,\ldots,k\}` be a set of :math:`k+1` positions.
25 | The above figure shows that :math:`k+1=3` for our data.
26 | We assume that the reward (click indicator) depends only on the item and its position, which is a general assumption on the click generative model in the web industry:cite:`Li2018`.
27 | Under the assumption, the action space is simply the product of the item set and the position set, i.e., :math:`\calA = \mathcal{I} \times \mathcal{K}`.
28 | Then, we can apply the standard OPE setup and estimators to our setting.
29 | We describe some statistics of the dataset in the following.
30 | 
31 | .. image:: ./_static/images/statistics_of_obd.png
32 |    :scale: 25%
33 |    :align: center
34 | 
35 | The data is large and contains many millions of recommendation instances.
36 | It also includes the true action choice probabilities by behavior policies computed by Monte Carlo simulations based on the policy parameters (e.g., parameters of the beta distribution used by Bernoulli TS) used during the data collection process.
37 | The number of actions is also sizable, so this setting is challenging for bandit algorithms and their OPE.
38 | We share the full version of our data at https://research.zozo.com/data.html
39 | 
40 | Open Bandit Pipeline (OBP)
41 | ---------------------------------
42 | 
43 | *Open Bandit Pipeline* is a series of implementations of dataset preprocessing, policy learning, and evaluation of OPE estimators.
44 | This pipeline allows researchers to focus on building their bandit algorithm or OPE estimator and easily compare them with others’ methods in realistic and reproducible ways.
45 | Thus, it facilitates reproducible research on bandit algorithms and off-policy evaluation.
46 | 
47 | .. image:: ./_static/images/overview.png
48 |    :scale: 40%
49 |    :align: center
50 | 
51 | Open Bandit Pipeline consists of the following main modules.
52 | 
53 | - **dataset module**: This module provides a data loader for Open Bandit Dataset and a flexible interface for handling logged bandit feedback. It also provides tools to generate synthetic bandit datasets.
54 | - **policy module**: This module provides interfaces for online and offline bandit algorithms. It also implements several standard algorithms.
55 | - **simulator module**: This module provides functions for conducting offline bandit simulation.
56 | - **ope module**: This module provides interfaces for OPE estimators. It also implements several standard OPE estimators.
57 | 
58 | In addition to the above algorithms and estimators, the pipeline also provides flexible interfaces.
59 | Therefore, researchers can easily implement their own algorithms or estimators and evaluate them with our data and pipeline.
60 | Moreover, the pipeline provides an interface for handling logged bandit feedback datasets.
61 | Thus, practitioners can combine their own datasets with the pipeline and easily evaluate bandit algorithms' performances in their settings.
62 | 
63 | Please see `package reference <https://zr-obp.readthedocs.io/en/latest/obp.html>`_ for detailed information about Open Bandit Pipeline.
64 | 
65 | To our knowledge, our real-world dataset and pipeline are the first to include logged bandit datasets collected by running *multiple* different policies, policy implementations used in production, and their ground-truth policy values.
66 | These features enable the **evaluation of OPE** for the first time.
67 | 


--------------------------------------------------------------------------------
/docs/estimators.rst:
--------------------------------------------------------------------------------
  1 | ================================================
  2 | Estimators
  3 | ================================================
  4 | 
  5 | 
  6 | Direct Method (DM)
  7 | --------------------------------------
  8 | A widely-used method, DM, first learns a supervised machine learning model, such as random forest, ridge regression, and gradient boosting, to estimate the mean reward function.
  9 | DM then uses it to estimate the policy value as
 10 | 
 11 | .. math::
 12 |     \hat{V}_{\mathrm{DM}} (\pi_e; \calD, \hat{q}) := \E_{\calD} [ \hat{q} (x_t, \pi_e) ],
 13 | 
 14 | where :math:`\hat{q}(a \mid x)` is the estimated reward function.
 15 | If :math:`\hat{q}(a \mid x)` is a good approximation to the mean reward function, this estimator accurately estimates the policy value of the evaluation policy :math:`V^{\pi}`.
 16 | If :math:`\hat{q}(a \mid x)` fails to approximate the mean reward function well, however, the final estimator is no longer consistent.
 17 | The model misspecification issue is problematic because the extent of misspecification cannot be easily quantified from data :cite:`Farajtabar2018`.
 18 | 
 19 | 
 20 | Inverse Probability Weighting (IPW)
 21 | --------------------------------------
 22 | To alleviate the issue with DM, researchers often use another estimator called IPW :cite:`Precup2000` :cite:`Strehl2010`.
 23 | IPW re-weights the rewards by the ratio of the evaluation policy and behavior policy as
 24 | 
 25 | .. math::
 26 |     \hat{V}_{\mathrm{IPW}} (\pi_e; \calD) := \E_{\calD} [w(x_t,a_t) r_t ],
 27 | 
 28 | where :math:`w(x,a) := \pi_e(a \mid x) / \pi_b(a \mid x)` is the importance weight given :math:`x` and :math:`a`.
 29 | When the behavior policy is known, the IPW estimator is unbiased and consistent for the policy value.
 30 | However, it can have a large variance, especially when the evaluation policy significantly deviates from the behavior policy.
 31 | 
 32 | 
 33 | Doubly Robust (DR)
 34 | --------------------------------------
 35 | The final approach is DR :cite:`Dudik2014`, which combines the above two estimators as
 36 | 
 37 | .. math::
 38 |     \hat{V}_{\mathrm{DR}} := \E_{\calD} [ \hat{q} (x_t, \pi_e) + w(x_t,a_t)  (r_t-\hat{q}(x_t, a_t) ) ].
 39 | 
 40 | DR mimics IPW to use a weighted version of rewards, but DR also uses the estimated mean reward function as a control variate to decrease the variance.
 41 | It preserves the consistency of IPW if either the importance weight or the mean reward estimator is accurate (a property called *double robustness*).
 42 | Moreover, DR is *semiparametric efficient* :cite:`Narita2019` when the mean reward estimator is correctly specified.
 43 | On the other hand, when it is wrong, this estimator can have larger asymptotic mean-squared-error than IPW :cite:`Kallus2019` and perform poorly in practice :cite:`Kang2007`.
 44 | 
 45 | 
 46 | 
 47 | Self-Normalized Estimators
 48 | --------------------------------------
 49 | Self-Normalized Inverse Probability Weighting (SNIPW) is an approach to address the variance issue with the original IPW.
 50 | It estimates the policy value by dividing the sum of weighted rewards by the sum of importance weights as:
 51 | 
 52 | .. math::
 53 |     \hat{V}_{\mathrm{SNIPW}} (\pi_e; \calD) :=\frac{\E_{\calD} [ w(x_t,a_t) r_t ]}{\E_{\calD} [ w(x_t,a_t) ]}.
 54 | 
 55 | SNIPW is more stable than IPW, because estimated policy value by SNIPW is bounded in the support of rewards and its conditional variance given action and context is bounded by the conditional variance of the rewards:cite:`kallus2019`.
 56 | IPW does not have these properties.
 57 | We can define Self-Normalized Doubly Robust (SNDR) in a similar manner as follows.
 58 | 
 59 | .. math::
 60 |     \hat{V}_{\mathrm{SNDR}} (\pi_e; \calD) := \E_{\calD} \left[\hat{q}(x_t, \pi_e) + \frac{w(x_t,a_t)  (r_t-\hat{q}(x_t, a_t) )}{\E_{\calD} [ w(x_t,a_t) ]} \right].
 61 | 
 62 | 
 63 | Switch Estimators
 64 | --------------------------------------
 65 | The DR estimator can still be subject to the variance issue, particularly when the importance weights are large due to low overlap.
 66 | Switch-DR aims to reduce the effect of the variance issue by using DM where importance weights are large as:
 67 | 
 68 | .. math::
 69 |     \hat{V}_{\mathrm{SwitchDR}} (\pi_e; \calD, \hat{q}, \tau) := \E_{\calD} \left[ \hat{q}(x_t, \pi_e) + w(x_t,a_t) (r_t-\hat{q}(x_t, a_t) ) \mathbb{I}\{ w(x_t,a_t) \le \tau \} \right],
 70 | 
 71 | where :math:`\mathbb{I} \{\cdot\}` is the indicator function and :math:`\tau \ge 0` is a hyperparameter.
 72 | Switch-DR interpolates between DM and DR.
 73 | When :math:`\tau=0`, it coincides with DM, while :math:`\tau \to \infty` yields DR.
 74 | This estimator is minimax optimal when :math:`\tau` is appropriately chosen :cite:`Wang2016`.
 75 | 
 76 | 
 77 | More Robust Doubly Robust (MRDR)
 78 | --------------------------------------
 79 | MRDR uses a specialized reward estimator (:math:`\hat{q}_{\mathrm{MRDR}}`) that minimizes the variance of the resulting policy value estimator:cite:`Farajtabar2018`.
 80 | This estimator estimates the policy value as:
 81 | 
 82 | .. math::
 83 |     \hat{V}_{\mathrm{MRDR}} (\pi_e; \calD, \hat{q}_{\mathrm{MRDR}}) := \hat{V}_{\mathrm{DR}} (\pi_e; \calD, \hat{q}_{\mathrm{MRDR}}),
 84 | 
 85 | where :math:`\mathcal{Q}` is a function class for the reward estimator.
 86 | When :math:`\mathcal{Q}` is well-specified, then :math:`\hat{q}_{\mathrm{MRDR}} = q`.
 87 | Here, even if :math:`\mathcal{Q}` is misspecified, the derived reward estimator is expected to behave well since the target function is the resulting variance.
 88 | 
 89 | 
 90 | Doubly Robust with Optimistic Shrinkage (DRos)
 91 | ----------------------------------------------------------------------------
 92 | :cite:`Su2019` proposes DRs based on a new weight function :math:`w_o: \calX \times \calA \rightarrow \mathbb{R}_{+}` that directly minimizes sharp bounds on the MSE of the resulting estimator.
 93 | DRs is defined as
 94 | 
 95 | .. math::
 96 |     \hat{V}_{\mathrm{DRs}} (\pi_e; \calD, \hat{q}, \lambda) := \E_{\calD} [ \hat{q} (x_t, \pi_e) + w_o (x_t, a_t; \lambda)  (r_t-\hat{q}(x_t, a_t) ) ],
 97 | 
 98 | where :math:`\lambda \ge 0` is a hyperparameter and the new weight is
 99 | 
100 | .. math::
101 |     w_o (x, a; \lambda) := \frac{\lambda}{w^{2}(x, a)+\lambda} w(x, a).
102 | 
103 | When :math:`\lambda = 0`, :math:`w_o (x, a; \lambda) = 0` leading to the standard DM.
104 | On the other hand, as :math:`\lambda \rightarrow \infty`, :math:`w_o (x, a; \lambda) = w(x,a)` leading to the original DR.
105 | 


--------------------------------------------------------------------------------
/docs/evaluation_ope.rst:
--------------------------------------------------------------------------------
 1 | ================================================
 2 | Evaluation of OPE
 3 | ================================================
 4 | Here we describe an experimental protocol to evaluate OPE estimators and use it to compare a wide variety of existing estimators.
 5 | 
 6 | We can empirically evaluate OPE estimators' performances by using two sources of logged bandit feedback collected by two different policies :math:`\pi^{(he)}` (hypothetical evaluation policy) and :math:`\pi^{(hb)}` (hypothetical behavior policy).
 7 | We denote log data generated by :math:`\pi^{(he)}` and :math:`\pi^{(hb)}` as :math:`\calD^{(he)} := \{ (x^{(he)}_t, a^{(he)}_t, r^{(he)}_t) \}_{t=1}^T` and :math:`\calD^{(hb)} := \{ (x^{(hb)}_t, a^{(hb)}_t, r^{(hb)}_t) \}_{t=1}^T`, respectively.
 8 | By applying the following protocol to several different OPE estimators, we can compare their estimation performances:
 9 | 
10 | 
11 | 1. Define the evaluation and test sets as:
12 | 
13 |     * in-sample case: :math:`\calD_{\mathrm{ev}} := \calD^{(hb)}_{1:T}`, :math:`\calD_{\mathrm{te}} := \calD^{(he)}_{1:T}`
14 |     * out-sample case: :math:`\calD_{\mathrm{ev}} := \calD^{(hb)}_{1:\tilde{t}}`, :math:`\calD_{\mathrm{te}} := \calD^{(he)}_{\tilde{t}+1:T}`
15 | 
16 |     where :math:`\calD_{a:b} := \{ (x_t,a_t,r_t) \}_{t=a}^{b}`.
17 | 
18 | 2. Estimate the policy value of :math:`\pi^{(he)}` using :math:`\calD_{\mathrm{ev}}` by an estimator :math:`\hat{V}`. We can represent an estimated policy value by :math:`\hat{V}` as :math:`\hat{V} (\pi^{(he)}; \calD_{\mathrm{ev}})`.
19 | 
20 | 3. Estimate :math:`V(\pi^{(he)})` by the *on-policy estimation* and regard it as the ground-truth as
21 | 
22 |     .. math::
23 |         V_{\mathrm{on}} (\pi^{(he)}; \calD_{\mathrm{te}}) := \E_{\calD_{\mathrm{te}}} [r^{(he)}_t].
24 | 
25 | 4. Compare the off-policy estimate :math:`\hat{V}(\pi^{(he)}; \calD_{\mathrm{ev}})` with its ground-truth :math:`V_{\mathrm{on}} (\pi^{(he)}; \calD_{\mathrm{te}})`. We can evaluate the estimation accuracy of :math:`\hat{V}` by the following *relative estimation error* (relative-EE):
26 | 
27 |     .. math::
28 |         \textit{relative-EE} (\hat{V}; \calD_{\mathrm{ev}}) := \left| \frac{\hat{V} (\pi^{(he)}; \calD_{\mathrm{ev}}) - V_{\mathrm{on}} (\pi^{(he)}; \calD_{\mathrm{te}}) }{V_{\mathrm{on}} (\pi^{(he)}; \calD_{\mathrm{te}})} \right|.
29 | 
30 | 5. To estimate standard deviation of relative-EE, repeat the above process several times with different bootstrap samples of the logged bandit data created by sampling data *with replacement* from :math:`\calD_{\mathrm{ev}}`.
31 | 
32 | We call the problem setting **without** the sample splitting by time series as in-sample case.
33 | In contrast, we call that **with** the sample splitting as out-sample case where OPE estimators aim to estimate the policy value of an evaluation policy in the test data.
34 | 
35 | The following algorithm describes the detailed experimental protocol to evaluate OPE estimators.
36 | 
37 | .. image:: ./_static/images/evaluation_of_ope_algo.png
38 |    :scale: 45%
39 |    :align: center
40 | 
41 | 
42 | Using the above protocol, our real-world data, and pipeline, we have performed extensive benchmark experiments on a variety of existing off-policy estimators.
43 | The experimental results and the relevant discussion can be found in `our paper <https://arxiv.org/abs/2008.07146>`_.
44 | The code for running the benchmark experiments can be found at `zr-obp/benchmark/ope <https://github.com/st-tech/zr-obp/tree/master/benchmark>`_.
45 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
  1 | .. obp documentation master file, created by
  2 |    sphinx-quickstart on Tue Jun 23 17:55:21 2020.
  3 |    You can adapt this file completely to your liking, but it should at least
  4 |    contain the root `toctree` directive.
  5 | 
  6 | .. image:: ./_static/images/logo.png
  7 |    :scale: 20%
  8 |    :align: center
  9 | 
 10 | Open Bandit Pipeline; a python library for bandit algorithms and off-policy evaluation
 11 | =========================================================================================
 12 | 
 13 | Overview
 14 | ~~~~~~~~~~~~
 15 | 
 16 | *Open Bandit Pipeline (OBP)* is an open source python library for bandit algorithms and off-policy evaluation (OPE).
 17 | The toolkit comes with the *Open Bandit Dataset* , a large-scale logged bandit feedback data collected on a fashion e-commerce platform, `ZOZOTOWN <https://corp.zozo.com/en/service/>`_.
 18 | The purpose of the open data and library is to enable easy, realistic, and reproducible evaluation of bandit algorithms and OPE.
 19 | OBP has a series of implementations of dataset preprocessing, bandit policy interfaces, and a variety of OPE estimators.
 20 | 
 21 | Our open data and pipeline facilitate evaluation and comparison related to the following research topics.
 22 | 
 23 | * **Bandit Algorithms**: Our data include the probabilities of each action being selected by behavior policies (the true propensity scores).
 24 | Therefore, it enables the evaluation of new online bandit algorithms, including contextual and combinatorial algorithms, in a large real-world setting.
 25 | 
 26 | 
 27 | * **Off-Policy Evaluation**: We present implementations of behavior policies used when collecting datasets as a part of our pipeline.
 28 | Our open data also contains logged bandit feedback data generated by multiple behavior policies.
 29 | Therefore, it enables the evaluation of off-policy evaluation with ground-truths for the performances of evaluation policies.
 30 | 
 31 | This website contains pages with example analyses to help demonstrate the usage of this library.
 32 | Additionally, it presents examples of evaluating counterfactual bandit algorithms and OPE itself.
 33 | The reference page contains the full reference documentation for the current functions of this toolkit.
 34 | 
 35 | Algorithms and OPE Estimators Supported
 36 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 37 | 
 38 | Bandit Algorithms
 39 | ----------------------
 40 | 
 41 |    * Online
 42 | 
 43 |       * Context-free
 44 | 
 45 |          * Random
 46 |          * Epsilon Greedy
 47 |          * Bernoulli Thompson Sampling
 48 | 
 49 |       * Contextual (Linear)
 50 | 
 51 |          * Linear Epsilon Greedy
 52 |          * Linear Thompson Sampling :cite:`Agrawal2013`
 53 |          * Linear Upper Confidence Bound :cite:`Li2010`
 54 | 
 55 |       * Contextual (Logistic)
 56 | 
 57 |          * Logistic Epsilon Greedy
 58 |          * Logistic Thompson Sampling :cite:`Chapelle2011`
 59 |          * Logistic Upper Confidence Bound :cite:`Mahajan2012`
 60 | 
 61 |    * Offline (Off-Policy Learning) :cite:`Dudik2014`
 62 | 
 63 |       * Inverse Probability Weighting
 64 | 
 65 | OPE Estimators
 66 | ----------------------
 67 |    * Replay Method (RM) :cite:`Li2012`
 68 |    * Direct Method (DM) :cite:`Beygelzimer2009`
 69 |    * Inverse Probability Weighting (IPW) :cite:`Precup2000` :cite:`Strehl2010`
 70 |    * Self-Normalized Inverse Probability Weighting (SNIPW) :cite:`Swaminathan2015b`
 71 |    * Doubly Robust (DR) :cite:`Dudik2014`
 72 |    * Switch Estimators :cite:`Wang2016`
 73 |    * Doubly Robust with Optimistic Shrinkage (DRos) :cite:`Su2019`
 74 |    * More Robust Doubly Robust (MRDR) :cite:`Farajtabar2018`
 75 |    * Double Machine Learning (DML) :cite:`Narita2020`
 76 | 
 77 | 
 78 | Citation
 79 | ~~~~~~~~~~~~
 80 | If you use our dataset and pipeline in your work, please cite our paper below.
 81 | 
 82 | ```
 83 | @article{saito2020open,
 84 |   title={Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation},
 85 |   author={Saito, Yuta and Shunsuke, Aihara and Megumi, Matsutani and Yusuke, Narita},
 86 |   journal={arXiv preprint arXiv:2008.07146},
 87 |   year={2020}
 88 | }
 89 | ```
 90 | 
 91 | Google Group
 92 | ~~~~~~~~~~~~~~~~
 93 | If you are interested in the Open Bandit Project, we can follow the updates at its google group: https://groups.google.com/g/open-bandit-project
 94 | 
 95 | Contact
 96 | ~~~~~~~~~~~~
 97 | For any question about the paper, data, and pipeline, feel free to contact: saito@hanjuku-kaso.com
 98 | 
 99 | Contribution
100 | ~~~~~~~~~~~~~~
101 | Any contributions to the Open Bandit Pipeline are more than welcome!
102 | Please refer to `CONTRIBUTING.md <https://github.com/st-tech/zr-obp/blob/master/CONTRIBUTING.md>`_ for general guidelines how to contribute to the project.
103 | 
104 | 
105 | Table of Contents
106 | ~~~~~~~~~~~~~~~~~~~~~~~~
107 | 
108 | .. toctree::
109 |    :maxdepth: 3
110 |    :caption: Introduction:
111 | 
112 |    about
113 |    related
114 | 
115 | .. toctree::
116 |    :maxdepth: 3
117 |    :caption: Off-Policy Evaluation (OPE):
118 | 
119 |    ope
120 |    estimators
121 |    evaluation_ope
122 | 
123 | .. toctree::
124 |    :maxdepth: 3
125 |    :caption: Getting Started:
126 | 
127 |    installation
128 |    quickstart
129 | 
130 | .. toctree::
131 |    :maxdepth: 3
132 |    :caption: Package Reference:
133 | 
134 |    obp
135 | 
136 | .. toctree::
137 |     :caption: Others:
138 | 
139 |     Github <https://github.com/st-tech/zr-obp>
140 |     Dataset <https://research.zozo.com/data.html>
141 |     Google Group <https://groups.google.com/g/open-bandit-project>
142 |     LICENSE <https://github.com/st-tech/zr-obp/blob/master/LICENSE>
143 |     references
144 | 
145 | 
146 | 
147 | Indices and tables
148 | ==================
149 | 
150 | * :ref:`genindex`
151 | * :ref:`modindex`
152 | * :ref:`search`
153 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | ============
 3 | Installation
 4 | ============
 5 | 
 6 | 
 7 | ``obp`` is available on PyPI, and can be installed from ``pip`` or source as follows:
 8 | 
 9 | From ``pip``:
10 | 
11 | .. code-block:: bash
12 | 
13 |     pip install obp
14 | 
15 | From source:
16 | 
17 | .. code-block:: bash
18 | 
19 |     git clone https://github.com/st-tech/zr-obp
20 |     cd zr-obp
21 |     python setup.py install
22 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/obp.rst:
--------------------------------------------------------------------------------
 1 | OBP Package Reference
 2 | =======================
 3 | 
 4 | ope module
 5 | ------------
 6 | .. autosummary::
 7 |     :toctree: _autosummary
 8 | 
 9 |     obp.ope.estimators
10 |     obp.ope.meta
11 |     obp.ope.regression_model
12 | 
13 | 
14 | policy module
15 | ---------------
16 | .. autosummary::
17 |     :toctree: _autosummary
18 | 
19 |     obp.policy.base
20 |     obp.policy.contextfree
21 |     obp.policy.linear
22 |     obp.policy.logistic
23 |     obp.policy.offline
24 | 
25 | 
26 | dataset module
27 | ---------------
28 | .. autosummary::
29 |     :toctree: _autosummary
30 | 
31 |     obp.dataset.base
32 |     obp.dataset.real
33 |     obp.dataset.synthetic
34 |     obp.dataset.multiclass
35 | 
36 | 
37 | simulator module
38 | ------------------
39 | .. autosummary::
40 |     :toctree: _autosummary
41 | 
42 |     obp.simulator.simulator
43 | 
44 | 
45 | others
46 | ---------------
47 | .. autosummary::
48 |     :toctree: _autosummary
49 | 
50 |     obp.utils
51 | 


--------------------------------------------------------------------------------
/docs/ope.rst:
--------------------------------------------------------------------------------
 1 | ================================================
 2 | Overview
 3 | ================================================
 4 | 
 5 | 
 6 | Setup
 7 | ------
 8 | 
 9 | We consider a general contextual bandit setting.
10 | Let :math:`r \in [0, R_{\mathrm{max}}]` denote a reward or outcome variable (e.g., whether a fashion item as an action results in a click).
11 | We let :math:`x \in \calX` be a context vector (e.g., the user's demographic profile) that the decision maker observes when picking an action.
12 | Rewards and contexts are sampled from the unknown probability distributions :math:`p (r \mid x, a)` and :math:`p(x)`, respectively.
13 | Let :math:`\calA:=\{0,\ldots,m\}` be a finite set of :math:`m+1` actions.
14 | We call a function :math:`\pi: \calX \rightarrow \Delta(\calA)` a *policy*.
15 | It maps each context :math:`x \in \calX` into a distribution over actions, where :math:`\pi (a \mid x)` is the probability of taking action :math:`a` given :math:`x`.
16 | 
17 | Let :math:`\calD := \{(x_t,a_t,r_t)\}_{t=1}^{T} ` be historical logged bandit feedback with :math:`T` rounds of observations.
18 | :math:`a_t` is a discrete variable indicating which action in :math:`\calA` is chosen in round :math:`t`.
19 | :math:`r_t` and :math:`x_t` denote the reward and the context observed in round :math:`t`, respectively.
20 | We assume that a logged bandit feedback is generated by a behavior policy :math:`\pi_b` as follows:
21 | 
22 | .. math::
23 |   \{(x_t,a_t,r_t)\}_{i=1}^{T} \sim \prod_{i=1}^{T} p(x_t) \pi_b (a_t \mid x_t) p(r_t \mid x_t, a_t),
24 | 
25 | where each context-action-reward triplets are sampled independently from the product distribution.
26 | Note that we assume :math:`a_t` is independent of :math:`r_t` conditional on :math:`x_t`.
27 | 
28 | We let :math:`\pi(x,a,r) := p(x) \pi (a \mid x) p(r \mid x, a)` be the product distribution by a policy :math:`\pi`.
29 | For a function :math:`f(x,a,r)`, we use :math:`\E_{\calD} [f] := |\calD|^{-1} \sum_{(x_t, a_t, r_t) \in \calD} f(x_t, a_t, r_t)` to denote its empirical expectation over :math:`T` observations in :math:`\calD`.
30 | Then, for a function :math:`g(x,a)`, we let :math:`g(x,\pi) := \E_{a \sim \pi(a|x)}[g(x,a) \mid x]`.
31 | We also use :math:`q(x,a) := \E_{r \sim p(r|x,a)} [ r \mid x, a ]` to denote the mean reward function.
32 | 
33 | 
34 | Estimation Target
35 | -------------------------
36 | We are interested in using the historical logged bandit data to estimate the following *policy value* of any given *evaluation policy* :math:`\pi_e` which might be different from :math:`\pi_b`:
37 | 
38 | .. math::
39 |     V (\pi_e) := \E_{(x,a,r) \sim \pi_e (x,a,r)} [r] .
40 | 
41 | where the last equality uses the independence of :math:`A` and :math:`Y(\cdot)` conditional on :math:`X` and the definition of :math:`\pi_b(\cdot|X)`.
42 | We allow the evaluation policy :math:`\pi_e` to be degenerate, i.e., it may choose a particular action with probability 1.
43 | Estimating :math:`V(\pi_e)` before implementing :math:`\pi_e` in an online environment is valuable because :math:`\pi_e` may perform poorly and damage user satisfaction.
44 | Additionally, it is possible to select an evaluation policy that maximizes the policy value by comparing their estimated performances without having additional implementation cost.
45 | 


--------------------------------------------------------------------------------
/docs/quickstart.rst:
--------------------------------------------------------------------------------
  1 | ============
  2 | Quickstart
  3 | ============
  4 | 
  5 | We show an example of conducting offline evaluation of the performance of Bernoulli Thompson Sampling (BernoulliTS) as an evaluation policy using *Inverse Probability Weighting (IPW)*
  6 | and logged bandit feedback generated by the Random policy (behavior policy).
  7 | We see that only ten lines of code are sufficient to complete OPE from scratch.
  8 | In this example, it is assumed that the `obd/random/all` directory exists under the present working directory.
  9 | Please clone `the repository <https://github.com/st-tech/zr-obp>`_ in advance.
 10 | 
 11 | .. code-block:: python
 12 | 
 13 |     # a case for implementing OPE of the BernoulliTS policy using log data generated by the Random policy
 14 |     >>> from obp.dataset import OpenBanditDataset
 15 |     >>> from obp.policy import BernoulliTS
 16 |     >>> from obp.ope import OffPolicyEvaluation, InverseProbabilityWeighting as IPW
 17 | 
 18 |     # (1) Data loading and preprocessing
 19 |     >>> dataset = OpenBanditDataset(behavior_policy='random', campaign='all')
 20 |     >>> bandit_feedback = dataset.obtain_batch_bandit_feedback()
 21 | 
 22 |     # (2) Off-Policy Learning
 23 |     >>> evaluation_policy = BernoulliTS(
 24 |         n_actions=dataset.n_actions,
 25 |         len_list=dataset.len_list,
 26 |         is_zozotown_prior=True,
 27 |         campaign="all",
 28 |         random_state=12345
 29 |     )
 30 |     >>> action_dist = evaluation_policy.compute_batch_action_dist(
 31 |         n_sim=100000, n_rounds=bandit_feedback["n_rounds"]
 32 |     )
 33 | 
 34 |     # (3) Off-Policy Evaluation
 35 |     >>> ope = OffPolicyEvaluation(bandit_feedback=bandit_feedback, ope_estimators=[IPW()])
 36 |     >>> estimated_policy_value = ope.estimate_policy_values(action_dist=action_dist)
 37 | 
 38 |     # estimated performance of BernoulliTS relative to the ground-truth performance of Random
 39 |     >>> relative_policy_value_of_bernoulli_ts = estimated_policy_value['ipw'] / bandit_feedback['reward'].mean()
 40 |     >>> print(relative_policy_value_of_bernoulli_ts)
 41 |     1.198126...
 42 | 
 43 | A detailed introduction with the same example can be found at `quickstart <https://github.com/st-tech/zr-obp/blob/master/examples/quickstart/quickstart.ipynb>`_.
 44 | Below, we explain some important features in the example flow.
 45 | 
 46 | 
 47 | Data loading and preprocessing
 48 | ------------------------------------
 49 | 
 50 | We prepare an easy-to-use data loader for Open Bandit Dataset.
 51 | 
 52 | .. code-block:: python
 53 | 
 54 |     # load and preprocess raw data in "ALL" campaign collected by the Random policy
 55 |     >>> dataset = OpenBanditDataset(behavior_policy='random', campaign='all')
 56 |     # obtain logged bandit feedback generated by the behavior policy
 57 |     >>> bandit_feedback = dataset.obtain_batch_bandit_feedback()
 58 | 
 59 |     >>> print(bandit_feedback.keys())
 60 |     dict_keys(['n_rounds', 'n_actions', 'action', 'position', 'reward', 'pscore', 'context', 'action_context'])
 61 | 
 62 | Users can implement their own feature engineering in the :class:`pre_process` method of :class:`obp.dataset.OpenBanditDataset` class.
 63 | We show an example of implementing some new feature engineering processes in `custom_dataset.py <https://github.com/st-tech/zr-obp/blob/master/benchmark/cf_policy_search/custom_dataset.py>`_.
 64 | 
 65 | Moreover, by following the interface of :class:`obp.dataset.BaseBanditDataset` class, one can handle their own or future open datasets for bandit algorithms other than our OBD.
 66 | 
 67 | Off-Policy Learning
 68 | ------------------------------
 69 | 
 70 | After preparing a dataset, we now compute the action choice probability of BernoulliTS in the ZOZOTOWN production.
 71 | Then, we can use it as the evaluation policy.
 72 | 
 73 | .. code-block:: python
 74 | 
 75 |     # define evaluation policy (the Bernoulli TS policy here)
 76 |     # by activating the `is_zozotown_prior` argument of BernoulliTS, we can replicate BernoulliTS used in ZOZOTOWN production.
 77 |     >>> evaluation_policy = BernoulliTS(
 78 |         n_actions=dataset.n_actions,
 79 |         len_list=dataset.len_list,
 80 |         is_zozotown_prior=True, # replicate the policy in the ZOZOTOWN production
 81 |         campaign="all",
 82 |         random_state=12345
 83 |     )
 84 |     # compute the distribution over actions by the evaluation policy using Monte Carlo simulation
 85 |     # action_dist is an array of shape (n_rounds, n_actions, len_list)
 86 |     # representing the distribution over actions made by the evaluation policy
 87 |     >>> action_dist = evaluation_policy.compute_batch_action_dist(
 88 |         n_sim=100000, n_rounds=bandit_feedback["n_rounds"]
 89 |     )
 90 | 
 91 | The :class:`compute_batch_action_dist` method of :class:`BernoulliTS` computes the action choice probabilities based on given hyperparameters of the beta distribution.
 92 | :class:`action_dist` is an array representing the distribution over actions made by the evaluation policy.
 93 | 
 94 | 
 95 | Off-Policy Evaluation
 96 | ------------------------------
 97 | 
 98 | Our final step is **off-policy evaluation** (OPE), which attempts to estimate the performance of decision making policy using log data generated by offline bandit simulation.
 99 | Our pipeline also provides an easy procedure for doing OPE as follows.
100 | 
101 | .. code-block:: python
102 | 
103 |     # estimate the policy value of BernoulliTS based on the distribution over actions by that policy
104 |     # it is possible to set multiple OPE estimators to the `ope_estimators` argument
105 |     >>> ope = OffPolicyEvaluation(bandit_feedback=bandit_feedback, ope_estimators=[ReplayMethod()])
106 |     >>> estimated_policy_value = ope.estimate_policy_values(action_dist=action_dist)
107 |     >>> print(estimated_policy_value)
108 |     {'ipw': 0.004553...} # dictionary containing estimated policy values by each OPE estimator.
109 | 
110 |     # compare the estimated performance of BernoulliTS (evaluation policy)
111 |     # with the ground-truth performance of Random (behavior policy)
112 |     >>> relative_policy_value_of_bernoulli_ts = estimated_policy_value['ipw'] / bandit_feedback['reward'].mean()
113 |     # our OPE procedure suggests that BernoulliTS improves Random by 19.81%
114 |     >>> print(relative_policy_value_of_bernoulli_ts)
115 |     1.198126...
116 | 
117 | Users can implement their own OPE estimator by following the interface of :class:`obp.ope.BaseOffPolicyEstimator` class.
118 | :class:`obp.ope.OffPolicyEvaluation` class summarizes and compares the estimated policy values by several off-policy estimators.
119 | A detailed usage of this class can be found at `quickstart <https://github.com/st-tech/zr-obp/tree/master/examples/quickstart>`_.
120 | :class:`bandit_feedback['reward'].mean()` is the empirical mean of factual rewards (on-policy estimate of the policy value) in the log and thus is the ground-truth performance of the behavior policy (the Random policy in this example.).
121 | 


--------------------------------------------------------------------------------
/docs/references.rst:
--------------------------------------------------------------------------------
 1 | References
 2 | ==========
 3 | 
 4 | 
 5 | Papers
 6 | ------
 7 | 
 8 | .. bibliography:: refs.bib
 9 |     :style: unsrt
10 | 
11 | 
12 | Projects
13 | ----------
14 | 
15 | This project is strongly inspired by **Open Graph Benchmark** --a collection of benchmark datasets, data loaders, and evaluators for graph machine learning:
16 | `[github] <https://github.com/snap-stanford/ogb>`_ `[project page] <https://ogb.stanford.edu>`_ `[paper] <https://arxiv.org/abs/2005.00687>`_.
17 | 


--------------------------------------------------------------------------------
/docs/related.rst:
--------------------------------------------------------------------------------
 1 | ===================
 2 | Related Resources
 3 | ===================
 4 | We summarize existing related resources for bandit algorithms and off-policy evaluation.
 5 | 
 6 | 
 7 | Related Datasets
 8 | --------------------
 9 | Our dataset is most closely related to those of :cite:`Lefortier2016` and :cite:`Li2010`.
10 | :cite:`Lefortier2016` introduces a large-scale logged bandit feedback data (Criteo data) from a leading company in the display advertising, Criteo.
11 | The data contains context vectors of user impressions, advertisements (ads) as actions, and click indicators as reward.
12 | It also provides the ex ante probability of each ad being selected by the behavior policy.
13 | Therefore, this data can be used to compare different *off-policy learning* methods, which aim to learn a new bandit policy using only log data generated by a behavior policy.
14 | In contrast, :cite:`Li2010` introduces a dataset (Yahoo! data) collected on a news recommendation interface of the the Yahoo! Today Module.
15 | The data contains context vectors of user impressions, presented news as actions, and click indicators as reward.
16 | It was collected by running uniform random policy on the new recommendation platform, allowing researchers to evaluate their own bandit algorithms.
17 | 
18 | However, the Criteo and Yahoo! data have limitations, which we overcome as follows:
19 | 
20 | * The previous datasets do not provide the code (production implementation) of their behavior policy. Moreover, the data was collected by running only a single behavior policy. As a result, these data cannot be used for the evaluation and comparison of different OPE estimators.
21 | 
22 | :math:`\rightarrow` In contrast, we provide the code of our behavior policies (i.e., Bernoulli TS and Random) in our pipeline, which allows researchers to re-run the same behavior policies on the log data. Our open data also contains logged bandit feedback data generated by *multiple* behavior policies.  It enables the evaluation and comparison of different OPE estimators. This is the first large-scale bandit dataset that enables such evaluation of OPE with the ground-truth policy value of behavior policies.
23 | 
24 | * The previous datasets do not provide a pipeline implementation to handle their data. Researchers have to re-implement the experimental environment by themselves before implementing their own methods. This may lead to inconsistent experimental conditions across different studies, potentially causing reproducibility issues.
25 | 
26 | :math:`\rightarrow` We implement the Open Bandit Pipeline to simplify and standardize the experimental processing of bandit algorithms and OPE with our open data. This tool thus contributes to the reproducible and transparent use of our data.
27 | 
28 | The following table summarizes key differences between our data and existing ones.
29 | 
30 | .. image:: ./_static/images/related_data.png
31 |    :scale: 40%
32 |    :align: center
33 | 
34 | Related Packages
35 | -------------------
36 | There are several existing Python packages related to our Open Bandit Pipeline.
37 | For example, *contextualbandits* package (https://github.com/david-cortes/contextualbandits) contains implementations of several contextual bandit algorithms :cite:`Cortes2018`.
38 | It aims to provide an easy procedure to compare bandit algorithms to reproduce research papers that do not provide easily-available implementations.
39 | In addition, *RecoGym* (https://github.com/criteo-research/reco-gym) focuses on providing simulation bandit environments imitating the e-commerce recommendation setting :cite:`Rohde2018`.
40 | This package also implements an online bandit algorithm based on epsilon greedy and off-policy learning method based on IPW.
41 | 
42 | However, the following features differentiate our pipeline from the previous ones:
43 | 
44 | * The previous packages focus on implementing and comparing online bandit algorithms or off-policy learning method. Instead, they **cannot** be used to implement and compare the off-policy evaluation methods.
45 | 
46 | :math:`\rightarrow` Our package implements a wide variety of OPE estimators including advanced ones such as Switch Estimators :cite:`Wang2016`, More Robust Doubly Robust :cite:`Farajtabar2018`, and Doubly Robust with Shrinkage :cite:`Su2019`. Moreover, it is possible to compare the estimation accuracies of these estimators with our package in a fair manner. Our package also provides flexible interfaces for implementing new OPE estimators. Thus, researchers can easily compare their own estimators with other methods using our packages.
47 | 
48 | * The previous packages cannot handle real-world bandit datasets.
49 | 
50 | :math:`\rightarrow` Our package comes with the Open Bandit Dataset and includes the **dataset module**. This enables the evaluation of bandit algorithms and off-policy estimators using our real-world data. This function contributes to realistic experiments on these topics.
51 | 
52 | The following table summarizes key differences between our pipeline and existing ones.
53 | 
54 | .. image:: ./_static/images/related_packages.png
55 |    :scale: 40%
56 |    :align: center
57 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | # Readthedocs requirements
2 | sphinx_rtd_theme
3 | sphinxcontrib-bibtex
4 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | # Open Bandit Pipeline Examples
 2 | 
 3 | This page contains a list of examples written with Open Bandit Pipeline.
 4 | 
 5 | - [`obd/`](./obd/): example implementations for evaluating standard off-policy estimators with the small sample Open Bandit Dataset.
 6 | - [`synthetic/`](./synthetic/): example implementations for evaluating several off-policy estimators with synthetic bandit datasets.
 7 | - [`multiclass/`](./multiclass/): example implementations for evaluating several off-policy estimators with multi-class classification datasets.
 8 | - [`replay/`](./replay/): example implementations for evaluating Replay Method with online bandit algorithms.
 9 | - [`opl/`](./opl/): example implementations for comparing the performance of several off-policy learners with synthetic bandit datasets.
10 | - [`quickstart/`](./quickstart/): some quickstart notebooks to guide the usage of Open Bandit Pipeline.
11 | 


--------------------------------------------------------------------------------
/examples/multiclass/README.md:
--------------------------------------------------------------------------------
 1 | # Example Experiment with Multi-class Classification Data
 2 | 
 3 | 
 4 | ## Description
 5 | 
 6 | We use multi-class classification datasets to evaluate OPE estimators. Specifically, we evaluate the estimation performance of some well-known OPE estimators using the ground-truth policy value of an evaluation policy calculable with multi-class classification data.
 7 | 
 8 | ## Evaluating Off-Policy Estimators
 9 | 
10 | In the following, we evaluate the estimation performance of
11 | 
12 | - Direct Method (DM)
13 | - Inverse Probability Weighting (IPW)
14 | - Self-Normalized Inverse Probability Weighting (SNIPW)
15 | - Doubly Robust (DR)
16 | - Self-Normalized Doubly Robust (SNDR)
17 | - Switch Doubly Robust (Switch-DR)
18 | - Doubly Robust with Optimistic Shrinkage (DRos)
19 | 
20 | For Switch-DR and DRos, we tune the built-in hyperparameters using SLOPE (Su et al., 2020;  Tucker et al., 2021), a data-driven hyperparameter tuning method for OPE estimators.
21 | See [our documentation](https://zr-obp.readthedocs.io/en/latest/estimators.html) for the details about these estimators.
22 | 
23 | ### Files
24 | - [`./evaluate_off_policy_estimators.py`](./evaluate_off_policy_estimators.py) implements the evaluation of OPE estimators using multi-class classification data.
25 | - [`./conf/hyperparams.yaml`](./conf/hyperparams.yaml) defines hyperparameters of some ML methods used to define regression model.
26 | 
27 | ### Scripts
28 | 
29 | ```bash
30 | # run evaluation of OPE estimators with multi-class classification data
31 | python evaluate_off_policy_estimators.py\
32 |     --n_runs $n_runs\
33 |     --dataset_name $dataset_name \
34 |     --eval_size $eval_size \
35 |     --base_model_for_behavior_policy $base_model_for_behavior_policy\
36 |     --alpha_b $alpha_b \
37 |     --base_model_for_evaluation_policy $base_model_for_evaluation_policy\
38 |     --alpha_e $alpha_e \
39 |     --base_model_for_reg_model $base_model_for_reg_model\
40 |     --n_jobs $n_jobs\
41 |     --random_state $random_state
42 | ```
43 | - `$n_runs` specifies the number of simulation runs in the experiment to estimate standard deviations of the performance of OPE estimators.
44 | - `$dataset_name` specifies the name of the multi-class classification dataset and should be one of "breast_cancer", "digits", "iris", or "wine".
45 | - `$eval_size` specifies the proportion of the dataset to include in the evaluation split.
46 | - `$base_model_for_behavior_policy` specifies the base ML model for defining behavior policy and should be one of "logistic_regression", "random_forest", or "lightgbm".
47 | - `$alpha_b`: specifies the ratio of a uniform random policy when constructing a behavior policy.
48 | - `$base_model_for_evaluation_policy` specifies the base ML model for defining evaluation policy and should be one of "logistic_regression", "random_forest", or "lightgbm".
49 | - `$alpha_e`: specifies the ratio of a uniform random policy when constructing an evaluation policy.
50 | - `$base_model_for_reg_model` specifies the base ML model for defining regression model and should be one of "logistic_regression", "random_forest", or "lightgbm".
51 | - `$n_jobs` is the maximum number of concurrently running jobs.
52 | 
53 | For example, the following command compares the estimation performance (relative estimation error; relative-ee) of the OPE estimators using the digits dataset.
54 | 
55 | ```bash
56 | python evaluate_off_policy_estimators.py\
57 |     --n_runs 30\
58 |     --dataset_name digits\
59 |     --eval_size 0.7\
60 |     --base_model_for_behavior_policy logistic_regression\
61 |     --alpha_b 0.4\
62 |     --base_model_for_evaluation_policy random_forest\
63 |     --alpha_e 0.9\
64 |     --base_model_for_reg_model lightgbm\
65 |     --n_jobs -1\
66 |     --random_state 12345
67 | 
68 | # relative-ee of OPE estimators and their standard deviations (lower is better).
69 | # =============================================
70 | # random_state=12345
71 | # ---------------------------------------------
72 | #                mean       std
73 | # dm         0.436541  0.017629
74 | # ipw        0.030288  0.024506
75 | # snipw      0.022764  0.017917
76 | # dr         0.016156  0.012679
77 | # sndr       0.022082  0.016865
78 | # switch-dr  0.034657  0.018575
79 | # dr-os      0.015868  0.012537
80 | # =============================================
81 | ```
82 | 
83 | The above result can change with different situations. You can try the evaluation of OPE with other experimental settings easily.
84 | 
85 | 
86 | ## References
87 | 
88 | - Yi Su, Pavithra Srinath, Akshay Krishnamurthy. [Adaptive Estimator Selection for Off-Policy Evaluation](https://arxiv.org/abs/2002.07729), ICML2020.
89 | - Yi Su, Maria Dimakopoulou, Akshay Krishnamurthy, Miroslav Dudík. [Doubly Robust Off-policy Evaluation with Shrinkage](https://arxiv.org/abs/1907.09623), ICML2020.
90 | - George Tucker and Jonathan Lee. [Improved Estimator Selection for Off-Policy Evaluation](https://lyang36.github.io/icml2021_rltheory/camera_ready/79.pdf), Workshop on Reinforcement Learning
91 | Theory at ICML2021.
92 | - Yu-Xiang Wang, Alekh Agarwal, Miroslav Dudik. [Optimal and Adaptive Off-policy Evaluation in Contextual Bandits](https://arxiv.org/abs/1612.01205), ICML2017.
93 | - Miroslav Dudik, John Langford, Lihong Li. [Doubly Robust Policy Evaluation and Learning](https://arxiv.org/abs/1103.4601). ICML2011.
94 | - Yuta Saito, Shunsuke Aihara, Megumi Matsutani, Yusuke Narita. [Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation](https://arxiv.org/abs/2008.07146). NeurIPS2021 Track on Datasets and Benchmarks.
95 | 
96 | 


--------------------------------------------------------------------------------
/examples/multiclass/conf/hyperparams.yaml:
--------------------------------------------------------------------------------
 1 | lightgbm:
 2 |   n_estimators: 30
 3 |   learning_rate: 0.01
 4 |   max_depth: 5
 5 |   min_samples_leaf: 10
 6 |   random_state: 12345
 7 | logistic_regression:
 8 |   max_iter: 10000
 9 |   C: 100
10 |   random_state: 12345
11 | random_forest:
12 |   n_estimators: 30
13 |   max_depth: 5
14 |   min_samples_leaf: 10
15 |   random_state: 12345
16 | 


--------------------------------------------------------------------------------
/examples/obd/README.md:
--------------------------------------------------------------------------------
 1 | # Example Experiment with Open Bandit Dataset
 2 | 
 3 | ## Description
 4 | 
 5 | We use Open Bandit Dataset to implement the evaluation of OPE. Specifically, we evaluate the estimation performance of some well-known OPE estimators using the on-policy policy value of an evaluation policy, which is calculable with the dataset.
 6 | 
 7 | ## Evaluating Off-Policy Estimators
 8 | 
 9 | In the following, we evaluate the estimation performance of
10 | 
11 | - Direct Method (DM)
12 | - Inverse Probability Weighting (IPW)
13 | - Self-Normalized Inverse Probability Weighting (SNIPW)
14 | - Doubly Robust (DR)
15 | - Self-Normalized Doubly Robust (SNDR)
16 | - Switch Doubly Robust (Switch-DR)
17 | - Doubly Robust with Optimistic Shrinkage (DRos)
18 | 
19 | For Switch-DR and DRos, we tune the built-in hyperparameters using SLOPE, a data-driven hyperparameter tuning method for OPE estimators.
20 | See [our documentation](https://zr-obp.readthedocs.io/en/latest/estimators.html) for the details about these estimators.
21 | 
22 | ### Files
23 | - [`./evaluate_off_policy_estimators.py`](./evaluate_off_policy_estimators.py) implements the evaluation of OPE estimators using Open Bandit Dataset.
24 | - [`.conf/hyperparams.yaml`](./conf/hyperparams.yaml) defines hyperparameters of some ML models used as the regression model in model dependent estimators (such as DM and DR).
25 | 
26 | ### Scripts
27 | 
28 | ```bash
29 | # run evaluation of OPE estimators with (small size) Open Bandit Dataset
30 | python evaluate_off_policy_estimators.py\
31 |     --n_runs $n_runs\
32 |     --base_model $base_model\
33 |     --evaluation_policy $evaluation_policy\
34 |     --behavior_policy $behavior_policy\
35 |     --campaign $campaign\
36 |     --n_sim_to_compute_action_dist $n_sim_to_compute_action_dist\
37 |     --n_jobs $n_jobs\
38 |     --random_state $random_state
39 | ```
40 | - `$n_runs` specifies the number of bootstrap sampling to estimate means and standard deviations of the performance of OPE estimators (i.e., relative estimation error).
41 | - `$base_model` specifies the base ML model for estimating the reward function, and should be one of `logistic_regression`, `random_forest`, or `lightgbm`.
42 | - `$evaluation_policy` and `$behavior_policy` specify the evaluation and behavior policies, respectively.
43 | They should be either 'bts' or 'random'.
44 | - `$campaign` specifies the campaign and should be one of 'all', 'men', or 'women'.
45 | - `$n_sim_to_compute_action_dist` is the number of monte carlo simulation to compute the action distribution of a given evaluation policy.
46 | - `$n_jobs` is the maximum number of concurrently running jobs.
47 | 
48 | For example, the following command compares the estimation performance of the three OPE estimators by using Bernoulli TS as evaluation policy and Random as behavior policy in "All" campaign.
49 | 
50 | ```bash
51 | python evaluate_off_policy_estimators.py\
52 |     --n_runs 30\
53 |     --base_model logistic_regression\
54 |     --evaluation_policy bts\
55 |     --behavior_policy random\
56 |     --campaign all\
57 |     --n_jobs -1
58 | 
59 | # relative estimation errors of OPE estimators and their standard deviations.
60 | # ==============================
61 | # random_state=12345
62 | # ------------------------------
63 | #                mean       std
64 | # dm         0.156876  0.109898
65 | # ipw        0.311082  0.311170
66 | # snipw      0.311795  0.334736
67 | # dr         0.292464  0.315485
68 | # sndr       0.302407  0.328434
69 | # switch-dr  0.258410  0.160598
70 | # dr-os      0.159520  0.109660
71 | # ==============================
72 | ```
73 | 
74 | Please refer to [this page](https://zr-obp.readthedocs.io/en/latest/evaluation_ope.html) for the evaluation of OPE protocol using our real-world data. Please visit [synthetic](../synthetic/) to try the evaluation of OPE estimators with synthetic bandit data. Moreover, in [benchmark/ope](https://github.com/st-tech/zr-obp/tree/master/benchmark/ope), we performed the benchmark experiments on several OPE estimators using the full size Open Bandit Dataset.
75 | 
76 | 
77 | 
78 | ## References
79 | 
80 | - Yi Su, Pavithra Srinath, Akshay Krishnamurthy. [Adaptive Estimator Selection for Off-Policy Evaluation](https://arxiv.org/abs/2002.07729), ICML2020.
81 | - Yi Su, Maria Dimakopoulou, Akshay Krishnamurthy, Miroslav Dudík. [Doubly Robust Off-policy Evaluation with Shrinkage](https://arxiv.org/abs/1907.09623), ICML2020.
82 | - George Tucker and Jonathan Lee. [Improved Estimator Selection for Off-Policy Evaluation](https://lyang36.github.io/icml2021_rltheory/camera_ready/79.pdf), Workshop on Reinforcement Learning
83 | Theory at ICML2021.
84 | - Yu-Xiang Wang, Alekh Agarwal, Miroslav Dudik. [Optimal and Adaptive Off-policy Evaluation in Contextual Bandits](https://arxiv.org/abs/1612.01205), ICML2017.
85 | - Miroslav Dudik, John Langford, Lihong Li. [Doubly Robust Policy Evaluation and Learning](https://arxiv.org/abs/1103.4601). ICML2011.
86 | - Yuta Saito, Shunsuke Aihara, Megumi Matsutani, Yusuke Narita. [Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation](https://arxiv.org/abs/2008.07146). NeurIPS2021 Track on Datasets and Benchmarks.
87 | 
88 | 


--------------------------------------------------------------------------------
/examples/obd/conf/hyperparams.yaml:
--------------------------------------------------------------------------------
 1 | lightgbm:
 2 |   n_estimators: 30
 3 |   learning_rate: 0.01
 4 |   max_depth: 5
 5 |   min_samples_leaf: 10
 6 |   random_state: 12345
 7 | logistic_regression:
 8 |   max_iter: 10000
 9 |   C: 100
10 |   random_state: 12345
11 | random_forest:
12 |   n_estimators: 30
13 |   max_depth: 5
14 |   min_samples_leaf: 10
15 |   random_state: 12345
16 | 


--------------------------------------------------------------------------------
/examples/opl/README.md:
--------------------------------------------------------------------------------
 1 | # Example with Off-Policy Policy Learners
 2 | 
 3 | 
 4 | ## Description
 5 | 
 6 | We use synthetic bandit data to evaluate some off-policy learners using their ground-truth policy value calculable with synthetic data.
 7 | 
 8 | ## Evaluating Off-Policy Learners
 9 | 
10 | In the following, we evaluate the performances of
11 | 
12 | - Uniform Random Policy (`Random`)
13 | - Inverse Probability Weighting Policy Learner (`IPWLearner`)
14 | - Policy Learner using Neural Networks (`NNPolicyLearner`)
15 | 
16 | See [our documentation](https://zr-obp.readthedocs.io/en/latest/_autosummary/obp.policy.offline.html) for the details about `IPWLearner` and `NNPolicyLearner`.
17 | 
18 | `NNPolicyLearner` can use the following OPE estimators as the objective function:
19 | - Direct Method (DM)
20 | - Inverse Probability Weighting (IPW)
21 | - Doubly Robust (DR)
22 | 
23 | See [our documentation](https://zr-obp.readthedocs.io/en/latest/estimators.html) for the details about these estimators.
24 | 
25 | ### Files
26 | - [`./evaluate_off_policy_learners.py`](./evaluate_off_policy_learners.py) implements the evaluation of off-policy learners using synthetic bandit data.
27 | - [`./conf/hyperparams.yaml`](./conf/hyperparams.yaml) defines hyperparameters of some ML methods used to define regression model and IPWLearner.
28 | 
29 | ### Scripts
30 | 
31 | ```bash
32 | # run evaluation of off-policy learners with synthetic bandit data
33 | python evaluate_off_policy_learners.py\
34 |     --n_rounds $n_rounds\
35 |     --n_actions $n_actions\
36 |     --dim_context $dim_context\
37 |     --beta $beta\
38 |     --base_model_for_evaluation_policy $base_model_for_evaluation_policy\
39 |     --base_model_for_reg_model $base_model_for_reg_model\
40 |     --off_policy_objective $off_policy_objective\
41 |     --n_hidden $n_hidden\
42 |     --n_layers $n_layers\
43 |     --activation $activation\
44 |     --solver $solver\
45 |     --batch_size $batch_size\
46 |     --early_stopping\
47 |     --random_state $random_state
48 | ```
49 | - `$n_rounds` and `$n_actions` specify the sample size and the number of actions of the synthetic bandit data, respectively.
50 | - `$dim_context` specifies the dimension of context vectors.
51 | - `$beta` specifies the inverse temperature parameter to control the behavior policy.
52 | - `$base_model_for_ipw_learner` specifies the base ML model for defining evaluation policy and should be one of "logistic_regression", "random_forest", or "lightgbm".
53 | - `$off_policy_objective` specifies the OPE estimator for NNPolicyLearner and should be one of "dm", "ipw", or "dr".
54 | - `$n_hidden` specifies the size of hidden layers in NNPolicyLearner.
55 | - `$n_layers` specifies the number of hidden layers in NNPolicyLearner.
56 | - `$activation` specifies the activation function for NNPolicyLearner and should be one of "identity", "tanh", "logistic", or "relu".
57 | - `$solver` specifies the optimizer for NNPolicyLearner and should be one of "adagrad", "sgd", or "adam".
58 | - `$batch_size` specifies the batch size for NNPolicyLearner.
59 | - `$early_stopping` enables early stopping of training of NNPolicyLearner.
60 | 
61 | For example, the following command compares the performance of the off-policy learners using synthetic bandit data with 100,00 rounds, 10 actions, five dimensional context vectors.
62 | 
63 | ```bash
64 | python evaluate_off_policy_learners.py\
65 |     --n_rounds 10000\
66 |     --n_actions 10\
67 |     --dim_context 5\
68 |     --base_model_for_ipw_learner logistic_regression\
69 |     --off_policy_objective ipw\
70 |     --n_hidden 100\
71 |     --n_layers 1\
72 |     --activation relu\
73 |     --solver adam\
74 |     --batch_size 200\
75 |     --early_stopping
76 | 
77 | # policy values of off-policy learners (higher means better)
78 | # =============================================
79 | # random_state=12345
80 | # ---------------------------------------------
81 | #                               policy value
82 | # random_policy                     0.499925
83 | # ipw_learner                       0.782430
84 | # nn_policy_learner (with ipw)      0.735947
85 | # =============================================
86 | ```
87 | 
88 | The above result can change with different situations. You can try the evaluation with other experimental settings easily.
89 | 
90 | 


--------------------------------------------------------------------------------
/examples/opl/conf/hyperparams.yaml:
--------------------------------------------------------------------------------
 1 | lightgbm:
 2 |   n_estimators: 30
 3 |   learning_rate: 0.01
 4 |   max_depth: 5
 5 |   min_samples_leaf: 10
 6 |   random_state: 12345
 7 | logistic_regression:
 8 |   max_iter: 10000
 9 |   C: 100
10 |   random_state: 12345
11 | random_forest:
12 |   n_estimators: 30
13 |   max_depth: 5
14 |   min_samples_leaf: 10
15 |   random_state: 12345
16 | 


--------------------------------------------------------------------------------
/examples/quickstart/README.md:
--------------------------------------------------------------------------------
 1 | # Open Bandit Pipeline Quickstart Notebooks
 2 | 
 3 | This page contains a list of quickstart notebooks written with Open Bandit Pipeline.
 4 | 
 5 | - [`obd.ipynb`](./obd.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/st-tech/zr-obp/blob/master/examples/quickstart/obd.ipynb): a quickstart guide of using Open Bandit Dataset and Pipeline to conduct some OPE experiments.
 6 | - [`synthetic.ipynb`](./synthetic.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/st-tech/zr-obp/blob/master/examples/quickstart/synthetic.ipynb): a quickstart guide to implement the standard off-policy learning, OPE, and the evaluation of OPE on synthetic bandit data with Open Bandit Pipeline.
 7 | - [`multiclass.ipynb`](./multiclass.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/st-tech/zr-obp/blob/master/examples/quickstart/multiclass.ipynb): a quickstart guide to handle multi-class classification data as logged bandit data for the standard off-policy learning, OPE, and the evaluation of OPE with Open Bandit Pipeline.
 8 | - [`online.ipynb`](./replay.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/st-tech/zr-obp/blob/master/examples/quickstart/online.ipynb): a quickstart guide to implement OPE and the evaluation of OPE for online bandit algorithms with Open Bandit Pipeline.
 9 | - [`opl.ipynb`](./opl.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/st-tech/zr-obp/blob/master/examples/quickstart/opl.ipynb): a quickstart guide to implement off-policy learners and the evaluation of off-policy learners with Open Bandit Pipeline.
10 | - [`synthetic_slate.ipynb`](./synthetic_slate.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/st-tech/zr-obp/blob/master/examples/quickstart/synthetic_slate.ipynb): a quickstart guide to implement OPE and the evaluation of OPE for the slate recommendation setting with Open Bandit Pipeline.
11 | 


--------------------------------------------------------------------------------
/examples/replay/README.md:
--------------------------------------------------------------------------------
 1 | # Replay Example with Online Bandit Algorithms
 2 | 
 3 | 
 4 | ## Description
 5 | 
 6 | We use synthetic bandit datasets to evaluate OPE of online bandit algorithms.
 7 | Specifically, we evaluate the estimation performance of some well-known OPE estimators using the ground-truth policy value of an evaluation policy calculable with synthetic data.
 8 | 
 9 | 
10 | ## Evaluating Off-Policy Estimators
11 | 
12 | In the following, we evaluate the estimation performance of Replay Method (RM).
13 | RM uses a subset of the logged bandit feedback data where actions selected by the behavior policy are the same as that of the evaluation policy.
14 | Theoretically, RM is unbiased when the behavior policy is uniformly random and the evaluation policy is fixed.
15 | However, empirically, RM works well when evaluation policies are learning algorithms.
16 | Please refer to https://arxiv.org/abs/1003.5956 about the details of RM.
17 | 
18 | 
19 | ### Files
20 | - [`./evaluate_off_policy_estimators.py`](./evaluate_off_policy_estimators.py) implements the evaluation of OPE estimators by RM using synthetic bandit data.
21 | 
22 | ### Scripts
23 | 
24 | ```bash
25 | # run evaluation of OPE estimators with synthetic bandit data
26 | python evaluate_off_policy_estimators.py\
27 |     --n_runs $n_runs\
28 |     --n_rounds $n_rounds\
29 |     --n_actions $n_actions\
30 |     --n_sim $n_sim\
31 |     --dim_context $dim_context\
32 |     --n_jobs $n_jobs\
33 |     --random_state $random_state
34 | ```
35 | - `$n_runs` specifies the number of simulation runs in the experiment to estimate standard deviations of the performance of OPE estimators.
36 | - `$n_rounds` and `$n_actions` specify the sample size and the number of actions of the synthetic bandit data.
37 | - `$dim_context` specifies the dimension of context vectors.
38 | - `$n_sim` specifeis the simulations in the Monte Carlo simulation to compute the ground-truth policy value.
39 | - `$evaluation_policy_name` specifeis the evaluation policy and should be one of "bernoulli_ts", "epsilon_greedy", "lin_epsilon_greedy", "lin_ts, lin_ucb", "logistic_epsilon_greedy", "logistic_ts", or "logistic_ucb".
40 | - `$n_jobs` is the maximum number of concurrently running jobs.
41 | 
42 | For example, the following command compares the estimation performance (relative estimation error; relative-ee) of the OPE estimators using synthetic bandit data with 100,000 rounds, 30 actions, five dimensional context vectors.
43 | 
44 | ```bash
45 | python evaluate_off_policy_estimators.py\
46 |     --n_runs 20\
47 |     --n_rounds 1000\
48 |     --n_actions 30\
49 |     --dim_context 5\
50 |     --evaluation_policy_name bernoulli_ts\
51 |     --n_sim 3\
52 |     --n_jobs -1\
53 |     --random_state 12345
54 | 
55 | # relative-ee of OPE estimators and their standard deviations (lower means accurate).
56 | # =============================================
57 | # random_state=12345
58 | # ---------------------------------------------
59 | #         mean       std
60 | # rm  0.097064  0.091453
61 | # =============================================
62 | ```
63 | 
64 | The above result can change with different situations.
65 | You can try the evaluation of OPE with other experimental settings easily.
66 | 


--------------------------------------------------------------------------------
/examples/replay/evaluate_off_policy_estimators.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from pathlib import Path
  3 | 
  4 | from joblib import delayed
  5 | from joblib import Parallel
  6 | import numpy as np
  7 | from pandas import DataFrame
  8 | 
  9 | from obp.dataset import logistic_reward_function
 10 | from obp.dataset import SyntheticBanditDataset
 11 | from obp.ope import OffPolicyEvaluation
 12 | from obp.ope import ReplayMethod
 13 | from obp.policy import BernoulliTS
 14 | from obp.policy import EpsilonGreedy
 15 | from obp.policy import LinEpsilonGreedy
 16 | from obp.policy import LinTS
 17 | from obp.policy import LinUCB
 18 | from obp.policy import LogisticEpsilonGreedy
 19 | from obp.policy import LogisticTS
 20 | from obp.policy import LogisticUCB
 21 | from obp.simulator import calc_ground_truth_policy_value
 22 | from obp.utils import run_bandit_replay
 23 | 
 24 | ope_estimators = [ReplayMethod()]
 25 | 
 26 | if __name__ == "__main__":
 27 |     parser = argparse.ArgumentParser(
 28 |         description="evaluate off-policy estimators with replay bandit algorithms and synthetic bandit data."
 29 |     )
 30 |     parser.add_argument(
 31 |         "--n_runs", type=int, default=1, help="number of simulations in the experiment."
 32 |     )
 33 |     parser.add_argument(
 34 |         "--n_rounds",
 35 |         type=int,
 36 |         default=10000,
 37 |         help="sample size of logged bandit data.",
 38 |     )
 39 |     parser.add_argument(
 40 |         "--n_actions",
 41 |         type=int,
 42 |         default=10,
 43 |         help="number of actions.",
 44 |     )
 45 |     parser.add_argument(
 46 |         "--dim_context",
 47 |         type=int,
 48 |         default=5,
 49 |         help="dimensions of context vectors.",
 50 |     )
 51 |     parser.add_argument(
 52 |         "--n_sim",
 53 |         type=int,
 54 |         default=1,
 55 |         help="number of simulations to calculate ground truth policy values",
 56 |     )
 57 |     parser.add_argument(
 58 |         "--evaluation_policy_name",
 59 |         type=str,
 60 |         choices=[
 61 |             "bernoulli_ts",
 62 |             "epsilon_greedy",
 63 |             "lin_epsilon_greedy",
 64 |             "lin_ts",
 65 |             "lin_ucb",
 66 |             "logistic_epsilon_greedy",
 67 |             "logistic_ts",
 68 |             "logistic_ucb",
 69 |         ],
 70 |         required=True,
 71 |         help="the name of evaluation policy, bernoulli_ts, epsilon_greedy, lin_epsilon_greedy, lin_ts, lin_ucb, logistic_epsilon_greedy, logistic_ts, or logistic_ucb",
 72 |     )
 73 |     parser.add_argument(
 74 |         "--n_jobs",
 75 |         type=int,
 76 |         default=1,
 77 |         help="the maximum number of concurrently running jobs.",
 78 |     )
 79 |     parser.add_argument("--random_state", type=int, default=12345)
 80 |     args = parser.parse_args()
 81 |     print(args)
 82 | 
 83 |     # configurations
 84 |     n_runs = args.n_runs
 85 |     n_rounds = args.n_rounds
 86 |     n_actions = args.n_actions
 87 |     dim_context = args.dim_context
 88 |     n_sim = args.n_sim
 89 |     evaluation_policy_name = args.evaluation_policy_name
 90 |     n_jobs = args.n_jobs
 91 |     random_state = args.random_state
 92 |     np.random.seed(random_state)
 93 | 
 94 |     # define evaluation policy
 95 |     evaluation_policy_dict = dict(
 96 |         bernoulli_ts=BernoulliTS(n_actions=n_actions, random_state=random_state),
 97 |         epsilon_greedy=EpsilonGreedy(
 98 |             n_actions=n_actions, epsilon=0.1, random_state=random_state
 99 |         ),
100 |         lin_epsilon_greedy=LinEpsilonGreedy(
101 |             dim=dim_context, n_actions=n_actions, epsilon=0.1, random_state=random_state
102 |         ),
103 |         lin_ts=LinTS(dim=dim_context, n_actions=n_actions, random_state=random_state),
104 |         lin_ucb=LinUCB(dim=dim_context, n_actions=n_actions, random_state=random_state),
105 |         logistic_epsilon_greedy=LogisticEpsilonGreedy(
106 |             dim=dim_context, n_actions=n_actions, epsilon=0.1, random_state=random_state
107 |         ),
108 |         logistic_ts=LogisticTS(
109 |             dim=dim_context, n_actions=n_actions, random_state=random_state
110 |         ),
111 |         logistic_ucb=LogisticUCB(
112 |             dim=dim_context, n_actions=n_actions, random_state=random_state
113 |         ),
114 |     )
115 |     evaluation_policy = evaluation_policy_dict[evaluation_policy_name]
116 | 
117 |     def process(i: int):
118 |         # synthetic data generator with uniformly random policy
119 |         dataset = SyntheticBanditDataset(
120 |             n_actions=n_actions,
121 |             dim_context=dim_context,
122 |             reward_function=logistic_reward_function,
123 |             behavior_policy_function=None,  # uniformly random
124 |             random_state=i,
125 |         )
126 |         # sample new data of synthetic logged bandit feedback
127 |         bandit_feedback = dataset.obtain_batch_bandit_feedback(n_rounds=n_rounds)
128 |         # simulate the evaluation policy
129 |         action_dist = run_bandit_replay(
130 |             bandit_feedback=bandit_feedback, policy=evaluation_policy
131 |         )
132 |         # estimate the ground-truth policy values of the evaluation policy
133 |         # by Monte-Carlo Simulation using p(r|x,a), the reward distribution
134 |         ground_truth_policy_value = calc_ground_truth_policy_value(
135 |             bandit_feedback=bandit_feedback,
136 |             reward_sampler=dataset.sample_reward,  # p(r|x,a)
137 |             policy=evaluation_policy,
138 |             n_sim=n_sim,  # the number of simulations
139 |         )
140 |         # evaluate estimators' performances using relative estimation error (relative-ee)
141 |         ope = OffPolicyEvaluation(
142 |             bandit_feedback=bandit_feedback,
143 |             ope_estimators=ope_estimators,
144 |         )
145 |         metric_i = ope.evaluate_performance_of_estimators(
146 |             ground_truth_policy_value=ground_truth_policy_value,
147 |             action_dist=action_dist,
148 |         )
149 | 
150 |         return metric_i
151 | 
152 |     processed = Parallel(
153 |         n_jobs=n_jobs,
154 |         verbose=50,
155 |     )([delayed(process)(i) for i in np.arange(n_runs)])
156 |     metric_dict = {est.estimator_name: dict() for est in ope_estimators}
157 |     for i, metric_i in enumerate(processed):
158 |         for (
159 |             estimator_name,
160 |             relative_ee_,
161 |         ) in metric_i.items():
162 |             metric_dict[estimator_name][i] = relative_ee_
163 |     se_df = DataFrame(metric_dict).describe().T.round(6)
164 | 
165 |     print("=" * 45)
166 |     print(f"random_state={random_state}")
167 |     print("-" * 45)
168 |     print(se_df[["mean", "std"]])
169 |     print("=" * 45)
170 | 
171 |     # save results of the evaluation of off-policy estimators in './logs' directory.
172 |     log_path = Path("./logs")
173 |     log_path.mkdir(exist_ok=True, parents=True)
174 |     se_df.to_csv(log_path / "relative_ee_of_ope_estimators.csv")
175 | 


--------------------------------------------------------------------------------
/examples/synthetic/README.md:
--------------------------------------------------------------------------------
 1 | # Example Experiment with Synthetic Bandit Data
 2 | 
 3 | ## Description
 4 | 
 5 | We use synthetic bandit datasets to evaluate OPE estimators. Specifically, we evaluate the estimation performance of well-known  estimators using the ground-truth policy value of an evaluation policy calculable with synthetic data.
 6 | 
 7 | ## Evaluating Off-Policy Estimators
 8 | 
 9 | In the following, we evaluate the estimation performance of
10 | 
11 | - Direct Method (DM)
12 | - Inverse Probability Weighting (IPW)
13 | - Self-Normalized Inverse Probability Weighting (SNIPW)
14 | - Doubly Robust (DR)
15 | - Self-Normalized Doubly Robust (SNDR)
16 | - Switch Doubly Robust (Switch-DR)
17 | - Doubly Robust with Optimistic Shrinkage (DRos)
18 | 
19 | For Switch-DR and DRos, we tune the built-in hyperparameters using SLOPE, a data-driven hyperparameter tuning method for OPE estimators.
20 | See [our documentation](https://zr-obp.readthedocs.io/en/latest/estimators.html) for the details about these estimators.
21 | 
22 | ### Files
23 | - [`./evaluate_off_policy_estimators.py`](./evaluate_off_policy_estimators.py) implements the evaluation of OPE estimators using synthetic bandit data.
24 | - [`./conf/hyperparams.yaml`](./conf/hyperparams.yaml) defines hyperparameters of some ML methods used to define regression model and IPWLearner.
25 | 
26 | ### Scripts
27 | 
28 | ```bash
29 | # run evaluation of OPE estimators with synthetic bandit data
30 | python evaluate_off_policy_estimators.py\
31 |     --n_runs $n_runs\
32 |     --n_rounds $n_rounds\
33 |     --n_actions $n_actions\
34 |     --dim_context $dim_context\
35 |     --beta $beta\
36 |     --base_model_for_evaluation_policy $base_model_for_evaluation_policy\
37 |     --base_model_for_reg_model $base_model_for_reg_model\
38 |     --n_jobs $n_jobs\
39 |     --random_state $random_state
40 | ```
41 | - `$n_runs` specifies the number of simulation runs in the experiment to estimate standard deviations of the performance of OPE estimators.
42 | - `$n_rounds` and `$n_actions` specify the sample size and the number of actions of the synthetic bandit data, respectively.
43 | - `$dim_context` specifies the dimension of context vectors.
44 | - `$beta` specifies the inverse temperature parameter to control the behavior policy.
45 | - `$base_model_for_evaluation_policy` specifies the base ML model for defining evaluation policy and should be one of "logistic_regression", "random_forest", or "lightgbm".
46 | - `$base_model_for_reg_model` specifies the base ML model for defining regression model and should be one of "logistic_regression", "random_forest", or "lightgbm".
47 | - `$n_jobs` is the maximum number of concurrently running jobs.
48 | 
49 | For example, the following command compares the estimation performance (relative estimation error; relative-ee) of the OPE estimators using synthetic bandit data with 10,000 samples, 30 actions, five dimensional context vectors.
50 | 
51 | ```bash
52 | python evaluate_off_policy_estimators.py\
53 |     --n_runs 20\
54 |     --n_rounds 10000\
55 |     --n_actions 30\
56 |     --dim_context 5\
57 |     --beta -3\
58 |     --base_model_for_evaluation_policy logistic_regression\
59 |     --base_model_for_reg_model logistic_regression\
60 |     --n_jobs -1\
61 |     --random_state 12345
62 | 
63 | # relative-ee of OPE estimators and their standard deviations (lower means accurate).
64 | # =============================================
65 | # random_state=12345
66 | # ---------------------------------------------
67 | #                mean       std
68 | # dm         0.074390  0.024525
69 | # ipw        0.009481  0.006899
70 | # snipw      0.006665  0.004541
71 | # dr         0.006175  0.004245
72 | # sndr       0.006118  0.003997
73 | # switch-dr  0.006175  0.004245
74 | # dr-os      0.021951  0.013337
75 | # =============================================
76 | ```
77 | 
78 | The above result can change with different situations. You can try the evaluation of OPE with other experimental settings easily.
79 | 
80 | ## References
81 | 
82 | - Yi Su, Pavithra Srinath, Akshay Krishnamurthy. [Adaptive Estimator Selection for Off-Policy Evaluation](https://arxiv.org/abs/2002.07729), ICML2020.
83 | - Yi Su, Maria Dimakopoulou, Akshay Krishnamurthy, Miroslav Dudík. [Doubly Robust Off-policy Evaluation with Shrinkage](https://arxiv.org/abs/1907.09623), ICML2020.
84 | - George Tucker and Jonathan Lee. [Improved Estimator Selection for Off-Policy Evaluation](https://lyang36.github.io/icml2021_rltheory/camera_ready/79.pdf), Workshop on Reinforcement Learning
85 | Theory at ICML2021.
86 | - Yu-Xiang Wang, Alekh Agarwal, Miroslav Dudik. [Optimal and Adaptive Off-policy Evaluation in Contextual Bandits](https://arxiv.org/abs/1612.01205), ICML2017.
87 | - Miroslav Dudik, John Langford, Lihong Li. [Doubly Robust Policy Evaluation and Learning](https://arxiv.org/abs/1103.4601). ICML2011.
88 | - Yuta Saito, Shunsuke Aihara, Megumi Matsutani, Yusuke Narita. [Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation](https://arxiv.org/abs/2008.07146). NeurIPS2021 Track on Datasets and Benchmarks.
89 | 
90 | 


--------------------------------------------------------------------------------
/examples/synthetic/conf/hyperparams.yaml:
--------------------------------------------------------------------------------
 1 | lightgbm:
 2 |   n_estimators: 30
 3 |   learning_rate: 0.01
 4 |   max_depth: 5
 5 |   min_samples_leaf: 10
 6 |   random_state: 12345
 7 | logistic_regression:
 8 |   max_iter: 10000
 9 |   C: 100
10 |   random_state: 12345
11 | random_forest:
12 |   n_estimators: 30
13 |   max_depth: 5
14 |   min_samples_leaf: 10
15 |   random_state: 12345
16 | 


--------------------------------------------------------------------------------
/images/dataset.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/images/dataset.png


--------------------------------------------------------------------------------
/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/images/logo.png


--------------------------------------------------------------------------------
/images/obd_stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/images/obd_stats.png


--------------------------------------------------------------------------------
/images/ope_results_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/images/ope_results_example.png


--------------------------------------------------------------------------------
/images/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/images/overview.png


--------------------------------------------------------------------------------
/images/recommended_fashion_items.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/images/recommended_fashion_items.png


--------------------------------------------------------------------------------
/obd/README.md:
--------------------------------------------------------------------------------
 1 | # Open Bandit Dataset
 2 | 
 3 | This directory contains the small size (10,000 records for each pair of campaign and behavior policy) version of our data that can be used for running our [quickstart guide](https://github.com/st-tech/zr-obp/blob/master/examples/quickstart/obd.ipynb) and [examples](https://github.com/st-tech/zr-obp/tree/master/examples/obd).
 4 | The full size version of our data is available at [https://research.zozo.com/data.html](https://research.zozo.com/data.html).
 5 | 
 6 | 
 7 | This dataset is released along with the paper:
 8 | 
 9 | Yuta Saito, Shunsuke Aihara, Megumi Matsutani, Yusuke Narita.<br>
10 | **Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation**<br>
11 | [https://arxiv.org/abs/2008.07146](https://arxiv.org/abs/2008.07146)
12 | 
13 | When using this dataset, please cite the paper with following bibtex:
14 | ```
15 | @article{saito2020open,
16 |   title={Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation},
17 |   author={Saito, Yuta and Shunsuke, Aihara and Megumi, Matsutani and Yusuke, Narita},
18 |   journal={arXiv preprint arXiv:2008.07146},
19 |   year={2020}
20 | }
21 | ```
22 | 
23 | ## Data description
24 | Open Bandit Dataset is constructed in an A/B test of two multi-armed bandit policies on a large-scale fashion e-commerce platform, [ZOZOTOWN](https://zozo.jp/).
25 | It currently consists of a total of about 26M rows, each one representing a user impression with some feature values, selected items as actions, true propensity scores, and click indicators as an outcome.
26 | This is especially suitable for evaluating *off-policy evaluation* (OPE), which aims to estimate the counterfactual performance of hypothetical algorithms using data generated by a different algorithm.
27 | 
28 | 
29 | ## Fields
30 | Here is a detailed description of the fields (they are comma-separated in the CSV files):
31 | 
32 | **{behavior_policy}/{campaign}.csv** (behavior_policy in (bts, random), campaign in (all, men, women))
33 | - timestamp: timestamps of impressions.
34 | - item_id: index of items as arms (index ranges from 0-79 in "All" campaign, 0-33 for "Men" campaign, and 0-45 "Women" campaign).
35 | - position: the position of an item being recommended (1, 2, or 3 correspond to left, center, and right position of the ZOZOTOWN recommendation interface, respectively).
36 | - click: target variable that indicates if an item was clicked (1) or not (0).
37 | - action_prob: the probability of an item being recommended at the given position.
38 | - user_features: user-related feature values.
39 | - user_item_affinity: user-item affinity scores induced by the number of past clicks observed between each user-item pair.
40 | 
41 | <p align="center">
42 |   <img width="85%" src="../images/dataset.png" />
43 |   <figcaption>
44 |   <p align="center">
45 |     Structure of Open Bandit Dataset
46 |   </p>
47 |   </figcaption>
48 | </p>
49 | 
50 | **item_context.csv**
51 | - item_id: index of items as arms (index ranges from 0-80 in "All" campaign, 0-33 for "Men" campaign, and 0-46 "Women" campaign).
52 | - item feature 0-3: item related feature values
53 | 
54 | 
55 | Note that user and item features are now anonymized using a hash function.
56 | 
57 | ## Contact
58 | For any question, feel free to contact:
59 | 
60 | - The authors of the paper: saito@hanjuku-kaso.com
61 | - ZOZO Research: zozo-research@zozo.com
62 | 


--------------------------------------------------------------------------------
/obd/README_JN.md:
--------------------------------------------------------------------------------
 1 | # Open Bandit Dataset
 2 | 
 3 | このディレクトリには, [実装例](https://github.com/st-tech/zr-obp/tree/master/examples)を実行するための少量(キャンペーンと行動ポリシーのペアごとに10,000レコード)のデータが含まれています. フルサイズ版のデータは[https://research.zozo.com/data.html](https://research.zozo.com/data.html)にて公開されています.
 4 | 
 5 | この公開データセットに関する詳細な記述は以下の論文を参照してください:
 6 | 
 7 | Yuta Saito, Shunsuke Aihara, Megumi Matsutani, Yusuke Narita. <br>
 8 | **Large-scale Open Dataset, Pipeline, and Benchmark for Bandit Algorithms** <br>
 9 | [https://arxiv.org/abs/2008.07146](https://arxiv.org/abs/2008.07146)
10 | 
11 | ## データセットの概要
12 | Open Bandit Datasetは, 大規模ファッションECサイト[ZOZOTOWN](https://zozo.jp/)において, 2つの多腕バンディット方策のA/Bテストによって構築されたものです. 現在のログデータ数は合計2600万以上であり, それぞれのデータは特徴量・方策によって選択されたファッションアイテム・真の傾向スコア・クリック有無ラベルによって構成されます. このデータセットは, 別のアルゴリズムによって生成されたデータを用いて反実仮想アルゴリズムの性能を予測するオフ方策評価 (off-policy evaluation)の性能を評価するのに特に適しています.
13 | 
14 | 
15 | ## 構成
16 | データセットの構成要素の詳細は以下の通りです.
17 | 
18 | **{behavior_policy}/{campaign}.csv** (behavior_policy in (bts, random), campaign in (all, men, women))
19 | - timestamp: インプレッションのタイムスタンプ.
20 | - item_id: アイテムのインデックス（インデックスの範囲は「すべて」キャンペーンでは0～80, 「男性」キャンペーンでは0～33, 「女性」キャンペーンでは0～46）.
21 | - position: 推薦されるアイテムの位置(1, 2, 3はそれぞれ[ZOZOTOWNの推薦インターフェース](../images/recommended_fashion_items.png)の左, 中央, 右の位置に対応)
22 | - click: アイテムがクリックされたか（1）, されなかったか（0）を示す2値目的変数.
23 | - action_prob：与えられたpositionにアイテムが推薦された際に計算した推薦確率. 傾向スコア.
24 | - user_features：ユーザーに関連する特徴量. 匿名化の目的でハッシュ化されている.
25 | - user_item_affinity: それぞれのユーザとアイテムのペア間で観測された過去のクリック数に応じた関連度特徴量
26 | 
27 | **item_context.csv** **item_context.csv** **item_context.csv**
28 | - item_id：アイテムのインデックス（インデックスの範囲は, 「すべて」キャンペーンでは0～79, 「男性」キャンペーンでは0～33, 「女性」キャンペーンでは0～45）.
29 | - item feature 0-3：アイテムに関連するの特徴量.
30 | 
31 | <p align="center">
32 |   <img width="85%" src="../images/dataset.png" />
33 |   <figcaption>
34 |   <p align="center">
35 |     Open Bandit Datasetの構成
36 |   </p>
37 |   </figcaption>
38 | </p>
39 | 
40 | なお, user featureとitem featureのそれぞれが何を表すかについては、現在公表されておりません.
41 | また, それぞれのfeatureの値は, ハッシュ関数を用いて匿名化されています.
42 | 
43 | ## 連絡
44 | データセットに関する質問等は, 次のメールアドレスにご連絡いただくようお願いいたします:
45 | 
46 | - 論文の著者: saito@hanjuku-kaso.com
47 | - ZOZO研究所: zozo-research@zozo.com
48 | 


--------------------------------------------------------------------------------
/obd/bts/men/item_context.csv:
--------------------------------------------------------------------------------
 1 | ,item_id,item_feature_0,item_feature_1,item_feature_2,item_feature_3
 2 | 0,0,-0.6771831139635117,c82d13885d8bf7a3b8b9fa6f0842ba60,088abf8a8657959e46ac19af8da80d15,8ea65bc866b36a8f00ae913e0c3acc29
 3 | 1,1,-0.7202996418188664,77490d05a721c6d93edf580642ffd8bd,088abf8a8657959e46ac19af8da80d15,8ea65bc866b36a8f00ae913e0c3acc29
 4 | 2,2,0.7456623052631924,77490d05a721c6d93edf580642ffd8bd,bfcce809dad48aadc7fcbe714f9eabd7,7a0c97ee71eb7985bd0a6271ce57cec5
 5 | 3,3,-0.6987413778911891,135f410ec21307919cd92df77f1e2a36,ff2de7df709624e5b79199b850382ea0,68f8b5168b2a322db725a6cd6f5c900b
 6 | 4,4,1.6511093902256406,61a525de9976c0f3fa29d400caf26c56,ee987234ffe4f3d901846ac3f7417738,7a0c97ee71eb7985bd0a6271ce57cec5
 7 | 5,5,0.14203091528822703,61a525de9976c0f3fa29d400caf26c56,bb7caf7f0c11f7827fb23b331777b871,8ea65bc866b36a8f00ae913e0c3acc29
 8 | 6,6,1.6511093902256406,c82d13885d8bf7a3b8b9fa6f0842ba60,818dfe387422471f09a34db693a78212,7a0c97ee71eb7985bd0a6271ce57cec5
 9 | 7,7,2.8583721701755715,61a525de9976c0f3fa29d400caf26c56,bfcce809dad48aadc7fcbe714f9eabd7,7a0c97ee71eb7985bd0a6271ce57cec5
10 | 8,8,1.349293695238158,61a525de9976c0f3fa29d400caf26c56,7daaf8717f83289266063b6cc1728087,7a0c97ee71eb7985bd0a6271ce57cec5
11 | 9,9,1.1983858477444165,135f410ec21307919cd92df77f1e2a36,bfcce809dad48aadc7fcbe714f9eabd7,7a0c97ee71eb7985bd0a6271ce57cec5
12 | 10,10,1.5864345984426087,135f410ec21307919cd92df77f1e2a36,bfcce809dad48aadc7fcbe714f9eabd7,7a0c97ee71eb7985bd0a6271ce57cec5
13 | 11,11,0.44384661027570976,c82d13885d8bf7a3b8b9fa6f0842ba60,24ea3b3a472c51dd6299ebdfb220a55f,0c3b42b13b5a49fcb746da9f60e63717
14 | 12,12,1.1983858477444165,c82d13885d8bf7a3b8b9fa6f0842ba60,0e077f97ef2dcda0dc404f873fc5f96c,7a0c97ee71eb7985bd0a6271ce57cec5
15 | 13,13,0.6163127216971285,135f410ec21307919cd92df77f1e2a36,0e077f97ef2dcda0dc404f873fc5f96c,7a0c97ee71eb7985bd0a6271ce57cec5
16 | 14,14,-1.000557072878672,135f410ec21307919cd92df77f1e2a36,865945b5265169a2176a6e5f084ab2eb,8ea65bc866b36a8f00ae913e0c3acc29
17 | 15,15,-0.37536741897602904,c82d13885d8bf7a3b8b9fa6f0842ba60,786ff5d72b02d1e68a43508d9579977d,68f8b5168b2a322db725a6cd6f5c900b
18 | 16,16,-0.5909500582528024,c82d13885d8bf7a3b8b9fa6f0842ba60,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b
19 | 17,17,-0.6987413778911891,135f410ec21307919cd92df77f1e2a36,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b
20 | 18,18,-0.9143240171679625,77490d05a721c6d93edf580642ffd8bd,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b
21 | 19,19,-0.7634161696742211,77490d05a721c6d93edf580642ffd8bd,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b
22 | 20,20,-0.6125083221804798,77490d05a721c6d93edf580642ffd8bd,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b
23 | 21,21,-0.6987413778911891,135f410ec21307919cd92df77f1e2a36,ff2de7df709624e5b79199b850382ea0,68f8b5168b2a322db725a6cd6f5c900b
24 | 22,22,-0.6987413778911891,17ef71cb22e550d31e5eaa4d629c4abd,088abf8a8657959e46ac19af8da80d15,8ea65bc866b36a8f00ae913e0c3acc29
25 | 23,23,-0.5693917943251251,e1b1451d555c82a01874347dbecdfeae,01b306b40a448bff555c06d5d72c0171,7a0c97ee71eb7985bd0a6271ce57cec5
26 | 24,24,0.4222883463480324,f15de9aa508214df06454736b488717c,7daaf8717f83289266063b6cc1728087,7a0c97ee71eb7985bd0a6271ce57cec5
27 | 25,25,-0.4616004746867384,135f410ec21307919cd92df77f1e2a36,088abf8a8657959e46ac19af8da80d15,8ea65bc866b36a8f00ae913e0c3acc29
28 | 26,26,0.8965701527569339,77490d05a721c6d93edf580642ffd8bd,746facf4548f3da6d628b8e35bf9e6ec,7a0c97ee71eb7985bd0a6271ce57cec5
29 | 27,27,-0.8496492253849305,17ef71cb22e550d31e5eaa4d629c4abd,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b
30 | 28,28,-1.0652318646617038,17ef71cb22e550d31e5eaa4d629c4abd,a46137fea33ac48f0809591a76630ea5,68f8b5168b2a322db725a6cd6f5c900b
31 | 29,29,-0.8496492253849305,17ef71cb22e550d31e5eaa4d629c4abd,008dc8758000efaf5b318227fcb71f8d,8ea65bc866b36a8f00ae913e0c3acc29
32 | 30,30,-0.9143240171679625,17ef71cb22e550d31e5eaa4d629c4abd,865945b5265169a2176a6e5f084ab2eb,8ea65bc866b36a8f00ae913e0c3acc29
33 | 31,31,-0.4616004746867384,e1b1451d555c82a01874347dbecdfeae,008dc8758000efaf5b318227fcb71f8d,8ea65bc866b36a8f00ae913e0c3acc29
34 | 32,32,-0.5262752664697704,f15de9aa508214df06454736b488717c,a46137fea33ac48f0809591a76630ea5,68f8b5168b2a322db725a6cd6f5c900b
35 | 33,33,-0.6125083221804798,f15de9aa508214df06454736b488717c,088abf8a8657959e46ac19af8da80d15,8ea65bc866b36a8f00ae913e0c3acc29
36 | 


--------------------------------------------------------------------------------
/obd/bts/women/item_context.csv:
--------------------------------------------------------------------------------
 1 | ,item_id,item_feature_0,item_feature_1,item_feature_2,item_feature_3
 2 | 0,0,-0.3701057045375884,37784fea97b5827eeaf4a23dbff98b73,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1
 3 | 1,1,0.5251956676347125,3220392a73f0fb73e5509a3f6b89ae64,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57
 4 | 2,2,-0.13450008028171972,1f0bd59babc615f7876d70abd81b0703,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1
 5 | 3,3,-0.5350296415166964,37784fea97b5827eeaf4a23dbff98b73,30e4f82eec0c5210c403aab8007a5881,2951c610187f9e9e8281ecd31a156bd1
 6 | 4,4,-0.25230289240965403,3220392a73f0fb73e5509a3f6b89ae64,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d
 7 | 5,5,0.03042385669738834,3220392a73f0fb73e5509a3f6b89ae64,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d
 8 | 6,6,-0.13450008028171972,3220392a73f0fb73e5509a3f6b89ae64,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d
 9 | 7,7,-0.8436730092918844,54130721ea2331736ec3cd62c6ff2a0a,92b8c61a1a556299172a5705f5a927db,360f242a6660cf5ee5249dc3c197fe62
10 | 8,8,-0.8436730092918844,54130721ea2331736ec3cd62c6ff2a0a,92b8c61a1a556299172a5705f5a927db,360f242a6660cf5ee5249dc3c197fe62
11 | 9,9,-0.6080673850360158,54130721ea2331736ec3cd62c6ff2a0a,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57
12 | 10,10,-0.6080673850360158,54130721ea2331736ec3cd62c6ff2a0a,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57
13 | 11,11,0.36027173065560447,e88594e2095dc09c70763bd14b6bb16e,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1
14 | 12,12,0.36027173065560447,e88594e2095dc09c70763bd14b6bb16e,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d
15 | 13,13,2.9990547223213335,37784fea97b5827eeaf4a23dbff98b73,cedae94e7ca42bac679afaf582fda539,e6dceba864edcc7bf60d38616a52a13d
16 | 14,14,1.3498153525302528,1f0bd59babc615f7876d70abd81b0703,cedae94e7ca42bac679afaf582fda539,e6dceba864edcc7bf60d38616a52a13d
17 | 15,15,-0.25230289240965403,3220392a73f0fb73e5509a3f6b89ae64,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d
18 | 16,16,0.6901196046138206,1f0bd59babc615f7876d70abd81b0703,40b2c280a2676cf7e83a2c19a333d4a2,7ce347fef632da56f7d0cd2e3d96c9d2
19 | 17,17,2.339358974404901,1f0bd59babc615f7876d70abd81b0703,5e32ca87b332cb657386052c2962f06f,e6dceba864edcc7bf60d38616a52a13d
20 | 18,18,-0.39366626696317525,3220392a73f0fb73e5509a3f6b89ae64,c72dd70c97975aad5865c138b5c3c501,360f242a6660cf5ee5249dc3c197fe62
21 | 19,19,-0.8648775154749125,37784fea97b5827eeaf4a23dbff98b73,c72dd70c97975aad5865c138b5c3c501,360f242a6660cf5ee5249dc3c197fe62
22 | 20,20,0.8550435415929286,e88594e2095dc09c70763bd14b6bb16e,c72dd70c97975aad5865c138b5c3c501,360f242a6660cf5ee5249dc3c197fe62
23 | 21,21,-0.6292718912190439,1f0bd59babc615f7876d70abd81b0703,c72dd70c97975aad5865c138b5c3c501,360f242a6660cf5ee5249dc3c197fe62
24 | 22,22,0.8550435415929286,e88594e2095dc09c70763bd14b6bb16e,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57
25 | 23,23,-0.2994240172608278,37784fea97b5827eeaf4a23dbff98b73,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57
26 | 24,24,-0.7235141409213913,3220392a73f0fb73e5509a3f6b89ae64,9dc794e30838384fe6068c9636d35d39,360f242a6660cf5ee5249dc3c197fe62
27 | 25,25,-0.95911976517726,980e8ad619a60423e616b67cfb8e09b9,9dc794e30838384fe6068c9636d35d39,360f242a6660cf5ee5249dc3c197fe62
28 | 26,26,-0.6292718912190439,980e8ad619a60423e616b67cfb8e09b9,a164a8f4dbd09847e25a3956e12bccff,360f242a6660cf5ee5249dc3c197fe62
29 | 27,27,-0.6292718912190439,980e8ad619a60423e616b67cfb8e09b9,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57
30 | 28,28,0.3367111682300176,72f3f67e8e9907b474c547847f8d5fd3,92b8c61a1a556299172a5705f5a927db,360f242a6660cf5ee5249dc3c197fe62
31 | 29,29,-0.2994240172608278,980e8ad619a60423e616b67cfb8e09b9,a3438007435a63dbe0ea33f5a0d1e84a,360f242a6660cf5ee5249dc3c197fe62
32 | 30,30,-0.2994240172608278,980e8ad619a60423e616b67cfb8e09b9,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1
33 | 31,31,-0.2994240172608278,980e8ad619a60423e616b67cfb8e09b9,a3438007435a63dbe0ea33f5a0d1e84a,360f242a6660cf5ee5249dc3c197fe62
34 | 32,32,0.17178723125090953,72f3f67e8e9907b474c547847f8d5fd3,04a71d6c9b0aa3b9e462a6923d1e8393,25e55d04edea9bd0a20aff26ac263414
35 | 33,33,-0.32298457968641464,72f3f67e8e9907b474c547847f8d5fd3,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d
36 | 34,34,0.5016351052091257,72f3f67e8e9907b474c547847f8d5fd3,5e32ca87b332cb657386052c2962f06f,e6dceba864edcc7bf60d38616a52a13d
37 | 35,35,-0.5585902039422833,72f3f67e8e9907b474c547847f8d5fd3,b02dadb348cf4ac330bf1d90cb80237e,2951c610187f9e9e8281ecd31a156bd1
38 | 36,36,-0.13450008028171972,3220392a73f0fb73e5509a3f6b89ae64,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1
39 | 37,37,-0.39366626696317525,f3a3cc32a3967214164eb2709555b3f7,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1
40 | 38,38,-0.3701057045375884,cd7b41b498ea6d9180ad3fd389422c39,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57
41 | 39,39,-0.46434795423993586,734fc1b871abffa4db3be9bc16ad80f7,b02dadb348cf4ac330bf1d90cb80237e,2951c610187f9e9e8281ecd31a156bd1
42 | 40,40,4.15352228117509,e88594e2095dc09c70763bd14b6bb16e,cedae94e7ca42bac679afaf582fda539,e6dceba864edcc7bf60d38616a52a13d
43 | 41,41,0.03042385669738834,734fc1b871abffa4db3be9bc16ad80f7,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d
44 | 42,42,-0.4879085166655227,980e8ad619a60423e616b67cfb8e09b9,9dc794e30838384fe6068c9636d35d39,360f242a6660cf5ee5249dc3c197fe62
45 | 43,43,-0.95911976517726,980e8ad619a60423e616b67cfb8e09b9,9dc794e30838384fe6068c9636d35d39,360f242a6660cf5ee5249dc3c197fe62
46 | 44,44,-0.46434795423993586,980e8ad619a60423e616b67cfb8e09b9,a164a8f4dbd09847e25a3956e12bccff,360f242a6660cf5ee5249dc3c197fe62
47 | 45,45,-0.7941958281981519,980e8ad619a60423e616b67cfb8e09b9,1b433010466b794694fc6f5f29eac0d8,360f242a6660cf5ee5249dc3c197fe62
48 | 


--------------------------------------------------------------------------------
/obd/random/men/item_context.csv:
--------------------------------------------------------------------------------
 1 | ,item_id,item_feature_0,item_feature_1,item_feature_2,item_feature_3
 2 | 0,0,-0.6771831139635117,ceca20033d7d36b74dc683ddfb804aa7,eb6f942c01859574cb88d2e62bf84354,795091554fd8f6b4a0ca7df81bf50a64
 3 | 1,1,-0.7202996418188664,270de57201b8ec18df9a72ed7ecf20eb,eb6f942c01859574cb88d2e62bf84354,795091554fd8f6b4a0ca7df81bf50a64
 4 | 2,2,0.7456623052631924,270de57201b8ec18df9a72ed7ecf20eb,2ba5916b91fd2d0688459ba79f033a9b,14fb049a96497a5deef345c1c38b2467
 5 | 3,3,-0.6987413778911891,cb4655bc2d2e54055efefb998883d6fe,1d8ba92fbaa83078dfe330d66b81e5d6,5cc21cc265333250f10b13783ab06472
 6 | 4,4,1.6511093902256406,ca9488139d82dbbf68a4e71fc7fe52f9,f65e8237cca7eb6b12f4f009a28a6f72,14fb049a96497a5deef345c1c38b2467
 7 | 5,5,0.14203091528822703,ca9488139d82dbbf68a4e71fc7fe52f9,571216af60c365e6a05e1c33c7041f5f,795091554fd8f6b4a0ca7df81bf50a64
 8 | 6,6,1.6511093902256406,ceca20033d7d36b74dc683ddfb804aa7,d56aaef6375c7844851af69b354331ba,14fb049a96497a5deef345c1c38b2467
 9 | 7,7,2.8583721701755715,ca9488139d82dbbf68a4e71fc7fe52f9,2ba5916b91fd2d0688459ba79f033a9b,14fb049a96497a5deef345c1c38b2467
10 | 8,8,1.349293695238158,ca9488139d82dbbf68a4e71fc7fe52f9,ef0257571cb05e9c0bba5446f9cfb0c9,14fb049a96497a5deef345c1c38b2467
11 | 9,9,1.1983858477444165,cb4655bc2d2e54055efefb998883d6fe,2ba5916b91fd2d0688459ba79f033a9b,14fb049a96497a5deef345c1c38b2467
12 | 10,10,1.5864345984426087,cb4655bc2d2e54055efefb998883d6fe,2ba5916b91fd2d0688459ba79f033a9b,14fb049a96497a5deef345c1c38b2467
13 | 11,11,0.44384661027570976,ceca20033d7d36b74dc683ddfb804aa7,b1dbb432e49fb71cc3b3e820ff31f3ad,6893a4373a4e271e7f03b7a4bdfde4a3
14 | 12,12,1.1983858477444165,ceca20033d7d36b74dc683ddfb804aa7,09122ea36aaf2a8dff8f089286af7cf3,14fb049a96497a5deef345c1c38b2467
15 | 13,13,0.6163127216971285,cb4655bc2d2e54055efefb998883d6fe,09122ea36aaf2a8dff8f089286af7cf3,14fb049a96497a5deef345c1c38b2467
16 | 14,14,-1.000557072878672,cb4655bc2d2e54055efefb998883d6fe,ec5fb795fb7b3a111ad15e1506487535,795091554fd8f6b4a0ca7df81bf50a64
17 | 15,15,-0.37536741897602904,ceca20033d7d36b74dc683ddfb804aa7,e26d13daee6e371dead874b89752bbbe,5cc21cc265333250f10b13783ab06472
18 | 16,16,-0.5909500582528024,ceca20033d7d36b74dc683ddfb804aa7,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472
19 | 17,17,-0.6987413778911891,cb4655bc2d2e54055efefb998883d6fe,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472
20 | 18,18,-0.9143240171679625,270de57201b8ec18df9a72ed7ecf20eb,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472
21 | 19,19,-0.7634161696742211,270de57201b8ec18df9a72ed7ecf20eb,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472
22 | 20,20,-0.6125083221804798,270de57201b8ec18df9a72ed7ecf20eb,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472
23 | 21,21,-0.6987413778911891,cb4655bc2d2e54055efefb998883d6fe,1d8ba92fbaa83078dfe330d66b81e5d6,5cc21cc265333250f10b13783ab06472
24 | 22,22,-0.6987413778911891,dbb8044a5cc8d79d0e5c3cf996e2d0b9,eb6f942c01859574cb88d2e62bf84354,795091554fd8f6b4a0ca7df81bf50a64
25 | 23,23,-0.5693917943251251,0450516d22e9e70b0ee136549576d0e7,937bfc1b19face0ab0a21dddaeaf19cd,14fb049a96497a5deef345c1c38b2467
26 | 24,24,0.4222883463480324,314759c31d4b75b54dfbbeb887f7bbe8,ef0257571cb05e9c0bba5446f9cfb0c9,14fb049a96497a5deef345c1c38b2467
27 | 25,25,-0.4616004746867384,cb4655bc2d2e54055efefb998883d6fe,eb6f942c01859574cb88d2e62bf84354,795091554fd8f6b4a0ca7df81bf50a64
28 | 26,26,0.8965701527569339,270de57201b8ec18df9a72ed7ecf20eb,ff86755a0252ce6d030f37e89025f60f,14fb049a96497a5deef345c1c38b2467
29 | 27,27,-0.8496492253849305,dbb8044a5cc8d79d0e5c3cf996e2d0b9,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472
30 | 28,28,-1.0652318646617038,dbb8044a5cc8d79d0e5c3cf996e2d0b9,3f7cf3ddf1cc36d8310a8c0a48187aa9,5cc21cc265333250f10b13783ab06472
31 | 29,29,-0.8496492253849305,dbb8044a5cc8d79d0e5c3cf996e2d0b9,5adc59d478af904390b1de5af7f33d45,795091554fd8f6b4a0ca7df81bf50a64
32 | 30,30,-0.9143240171679625,dbb8044a5cc8d79d0e5c3cf996e2d0b9,ec5fb795fb7b3a111ad15e1506487535,795091554fd8f6b4a0ca7df81bf50a64
33 | 31,31,-0.4616004746867384,0450516d22e9e70b0ee136549576d0e7,5adc59d478af904390b1de5af7f33d45,795091554fd8f6b4a0ca7df81bf50a64
34 | 32,32,-0.5262752664697704,314759c31d4b75b54dfbbeb887f7bbe8,3f7cf3ddf1cc36d8310a8c0a48187aa9,5cc21cc265333250f10b13783ab06472
35 | 33,33,-0.6125083221804798,314759c31d4b75b54dfbbeb887f7bbe8,eb6f942c01859574cb88d2e62bf84354,795091554fd8f6b4a0ca7df81bf50a64
36 | 


--------------------------------------------------------------------------------
/obd/random/women/item_context.csv:
--------------------------------------------------------------------------------
 1 | ,item_id,item_feature_0,item_feature_1,item_feature_2,item_feature_3
 2 | 0,0,-0.3701057045375884,01a0a328db2dd2a2e8d91bc43f204ba7,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb
 3 | 1,1,0.5251956676347125,dd868ca2c498f3384250f431e7767b34,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb
 4 | 2,2,-0.13450008028171972,252326b1475c78b26365ebc3430adca2,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb
 5 | 3,3,-0.5350296415166964,01a0a328db2dd2a2e8d91bc43f204ba7,d549c11ab8eb14045de2100d6ab90c86,6476528092c639c0ea8f74062f3dd1bb
 6 | 4,4,-0.25230289240965403,dd868ca2c498f3384250f431e7767b34,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347
 7 | 5,5,0.03042385669738834,dd868ca2c498f3384250f431e7767b34,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347
 8 | 6,6,-0.13450008028171972,dd868ca2c498f3384250f431e7767b34,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347
 9 | 7,7,-0.8436730092918844,2f872b67f01f5f2f85b24eb87e99d52c,9f43744cf9ae18357d9da7e6b130d3ea,465917095d1b8b7359e781ee782c2c26
10 | 8,8,-0.8436730092918844,2f872b67f01f5f2f85b24eb87e99d52c,9f43744cf9ae18357d9da7e6b130d3ea,465917095d1b8b7359e781ee782c2c26
11 | 9,9,-0.6080673850360158,2f872b67f01f5f2f85b24eb87e99d52c,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb
12 | 10,10,-0.6080673850360158,2f872b67f01f5f2f85b24eb87e99d52c,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb
13 | 11,11,0.36027173065560447,ef42bd4fa577ce60a5b82b6781a08c64,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb
14 | 12,12,0.36027173065560447,ef42bd4fa577ce60a5b82b6781a08c64,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347
15 | 13,13,2.9990547223213335,01a0a328db2dd2a2e8d91bc43f204ba7,ae34b8819f09a1df5bc36a3ebb2ca7c1,75b8605bfcb7433d5bd178b3a0a2d38c
16 | 14,14,1.3498153525302528,252326b1475c78b26365ebc3430adca2,ae34b8819f09a1df5bc36a3ebb2ca7c1,75b8605bfcb7433d5bd178b3a0a2d38c
17 | 15,15,-0.25230289240965403,dd868ca2c498f3384250f431e7767b34,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347
18 | 16,16,0.6901196046138206,252326b1475c78b26365ebc3430adca2,0409e7011c80bccc0ff6442a03d05b29,c395d5f54cf50e223953258801be2697
19 | 17,17,2.339358974404901,252326b1475c78b26365ebc3430adca2,836017345da8a6725b8eed235c5ec3d0,75b8605bfcb7433d5bd178b3a0a2d38c
20 | 18,18,-0.39366626696317525,dd868ca2c498f3384250f431e7767b34,d34d1f81e32b9e570fab69c523409c8d,465917095d1b8b7359e781ee782c2c26
21 | 19,19,-0.8648775154749125,01a0a328db2dd2a2e8d91bc43f204ba7,d34d1f81e32b9e570fab69c523409c8d,465917095d1b8b7359e781ee782c2c26
22 | 20,20,0.8550435415929286,ef42bd4fa577ce60a5b82b6781a08c64,d34d1f81e32b9e570fab69c523409c8d,465917095d1b8b7359e781ee782c2c26
23 | 21,21,-0.6292718912190439,252326b1475c78b26365ebc3430adca2,d34d1f81e32b9e570fab69c523409c8d,465917095d1b8b7359e781ee782c2c26
24 | 22,22,0.8550435415929286,ef42bd4fa577ce60a5b82b6781a08c64,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb
25 | 23,23,-0.2994240172608278,01a0a328db2dd2a2e8d91bc43f204ba7,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb
26 | 24,24,-0.7235141409213913,dd868ca2c498f3384250f431e7767b34,e82eb366fa5e481c13b09c24eeeb036d,465917095d1b8b7359e781ee782c2c26
27 | 25,25,-0.95911976517726,4c508776e494a9f4bc302b34fdc6e76e,e82eb366fa5e481c13b09c24eeeb036d,465917095d1b8b7359e781ee782c2c26
28 | 26,26,-0.6292718912190439,4c508776e494a9f4bc302b34fdc6e76e,683522ca22eaee449c5ac25c2a84ee52,465917095d1b8b7359e781ee782c2c26
29 | 27,27,-0.6292718912190439,4c508776e494a9f4bc302b34fdc6e76e,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb
30 | 28,28,0.3367111682300176,de083a9403b58424cb3834909131a6de,9f43744cf9ae18357d9da7e6b130d3ea,465917095d1b8b7359e781ee782c2c26
31 | 29,29,-0.2994240172608278,4c508776e494a9f4bc302b34fdc6e76e,e98a5a6ce8eca89f5d9084dee8079f60,465917095d1b8b7359e781ee782c2c26
32 | 30,30,-0.2994240172608278,4c508776e494a9f4bc302b34fdc6e76e,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb
33 | 31,31,-0.2994240172608278,4c508776e494a9f4bc302b34fdc6e76e,e98a5a6ce8eca89f5d9084dee8079f60,465917095d1b8b7359e781ee782c2c26
34 | 32,32,0.17178723125090953,de083a9403b58424cb3834909131a6de,75fb3fbc11695c908a1397f96079949b,7ab06c804ac515866a347cb9a54bf2c8
35 | 33,33,-0.32298457968641464,de083a9403b58424cb3834909131a6de,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347
36 | 34,34,0.5016351052091257,de083a9403b58424cb3834909131a6de,836017345da8a6725b8eed235c5ec3d0,75b8605bfcb7433d5bd178b3a0a2d38c
37 | 35,35,-0.5585902039422833,de083a9403b58424cb3834909131a6de,7e7fdf8c70a61405fea41ab1bf7cca25,6476528092c639c0ea8f74062f3dd1bb
38 | 36,36,-0.13450008028171972,dd868ca2c498f3384250f431e7767b34,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb
39 | 37,37,-0.39366626696317525,5c1e1f8eb530ea4363c04483cd523ac4,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb
40 | 38,38,-0.3701057045375884,3f7aceec173a91029fead403c0fa4bc9,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb
41 | 39,39,-0.46434795423993586,a37dab32ea544e235487fb30dc1b29f1,7e7fdf8c70a61405fea41ab1bf7cca25,6476528092c639c0ea8f74062f3dd1bb
42 | 40,40,4.15352228117509,ef42bd4fa577ce60a5b82b6781a08c64,ae34b8819f09a1df5bc36a3ebb2ca7c1,75b8605bfcb7433d5bd178b3a0a2d38c
43 | 41,41,0.03042385669738834,a37dab32ea544e235487fb30dc1b29f1,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347
44 | 42,42,-0.4879085166655227,4c508776e494a9f4bc302b34fdc6e76e,e82eb366fa5e481c13b09c24eeeb036d,465917095d1b8b7359e781ee782c2c26
45 | 43,43,-0.95911976517726,4c508776e494a9f4bc302b34fdc6e76e,e82eb366fa5e481c13b09c24eeeb036d,465917095d1b8b7359e781ee782c2c26
46 | 44,44,-0.46434795423993586,4c508776e494a9f4bc302b34fdc6e76e,683522ca22eaee449c5ac25c2a84ee52,465917095d1b8b7359e781ee782c2c26
47 | 45,45,-0.7941958281981519,4c508776e494a9f4bc302b34fdc6e76e,14692cff9f8196fb8846653310d39719,465917095d1b8b7359e781ee782c2c26
48 | 


--------------------------------------------------------------------------------
/obp/__init__.py:
--------------------------------------------------------------------------------
 1 | from obp import dataset
 2 | from obp import ope
 3 | from obp import policy
 4 | from obp import simulator
 5 | from obp import types
 6 | from obp import utils
 7 | from obp.version import __version__  # noqa
 8 | 
 9 | 
10 | __all__ = ["dataset", "ope", "policy", "simulator", "types", "utils", "version"]
11 | 


--------------------------------------------------------------------------------
/obp/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | from obp.dataset.base import BaseBanditDataset
 2 | from obp.dataset.base import BaseRealBanditDataset
 3 | from obp.dataset.multiclass import MultiClassToBanditReduction
 4 | from obp.dataset.real import OpenBanditDataset
 5 | from obp.dataset.synthetic import linear_behavior_policy
 6 | from obp.dataset.synthetic import linear_reward_function
 7 | from obp.dataset.synthetic import logistic_polynomial_reward_function
 8 | from obp.dataset.synthetic import logistic_reward_function
 9 | from obp.dataset.synthetic import logistic_sparse_reward_function
10 | from obp.dataset.synthetic import polynomial_behavior_policy
11 | from obp.dataset.synthetic import polynomial_reward_function
12 | from obp.dataset.synthetic import sparse_reward_function
13 | from obp.dataset.synthetic import SyntheticBanditDataset
14 | from obp.dataset.synthetic_continuous import linear_behavior_policy_continuous
15 | from obp.dataset.synthetic_continuous import linear_reward_funcion_continuous
16 | from obp.dataset.synthetic_continuous import linear_synthetic_policy_continuous
17 | from obp.dataset.synthetic_continuous import quadratic_reward_funcion_continuous
18 | from obp.dataset.synthetic_continuous import sign_synthetic_policy_continuous
19 | from obp.dataset.synthetic_continuous import SyntheticContinuousBanditDataset
20 | from obp.dataset.synthetic_continuous import threshold_synthetic_policy_continuous
21 | from obp.dataset.synthetic_embed import SyntheticBanditDatasetWithActionEmbeds
22 | from obp.dataset.synthetic_multi import SyntheticMultiLoggersBanditDataset
23 | from obp.dataset.synthetic_slate import action_interaction_reward_function
24 | from obp.dataset.synthetic_slate import linear_behavior_policy_logit
25 | from obp.dataset.synthetic_slate import SyntheticSlateBanditDataset
26 | 
27 | 
28 | __all__ = [
29 |     "BaseBanditDataset",
30 |     "BaseRealBanditDataset",
31 |     "OpenBanditDataset",
32 |     "SyntheticBanditDataset",
33 |     "logistic_reward_function",
34 |     "logistic_polynomial_reward_function",
35 |     "logistic_sparse_reward_function",
36 |     "linear_reward_function",
37 |     "polynomial_reward_function",
38 |     "sparse_reward_function",
39 |     "linear_behavior_policy",
40 |     "polynomial_behavior_policy",
41 |     "MultiClassToBanditReduction",
42 |     "SyntheticContinuousBanditDataset",
43 |     "linear_reward_funcion_continuous",
44 |     "quadratic_reward_funcion_continuous",
45 |     "linear_behavior_policy_continuous",
46 |     "linear_synthetic_policy_continuous",
47 |     "threshold_synthetic_policy_continuous",
48 |     "sign_synthetic_policy_continuous",
49 |     "SyntheticSlateBanditDataset",
50 |     "action_interaction_reward_function",
51 |     "linear_behavior_policy_logit",
52 |     "SyntheticBanditDatasetWithActionEmbeds",
53 |     "SyntheticMultiLoggersBanditDataset",
54 | ]
55 | 


--------------------------------------------------------------------------------
/obp/dataset/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Yuta Saito, Yusuke Narita, and ZOZO Technologies, Inc. All rights reserved.
 2 | # Licensed under the Apache 2.0 License.
 3 | 
 4 | """Abstract Base Class for Logged Bandit Feedback."""
 5 | from abc import ABCMeta
 6 | from abc import abstractmethod
 7 | 
 8 | 
 9 | class BaseBanditDataset(metaclass=ABCMeta):
10 |     """Base Class for Synthetic Bandit Dataset."""
11 | 
12 |     @abstractmethod
13 |     def obtain_batch_bandit_feedback(self) -> None:
14 |         """Obtain batch logged bandit data."""
15 |         raise NotImplementedError
16 | 
17 | 
18 | class BaseRealBanditDataset(BaseBanditDataset):
19 |     """Base Class for Real-World Bandit Dataset."""
20 | 
21 |     @abstractmethod
22 |     def load_raw_data(self) -> None:
23 |         """Load raw dataset."""
24 |         raise NotImplementedError
25 | 
26 |     @abstractmethod
27 |     def pre_process(self) -> None:
28 |         """Preprocess raw dataset."""
29 |         raise NotImplementedError
30 | 


--------------------------------------------------------------------------------
/obp/dataset/obd/bts/men/item_context.csv:
--------------------------------------------------------------------------------
 1 | ,item_id,item_feature_0,item_feature_1,item_feature_2,item_feature_3
 2 | 0,0,-0.6771831139635117,c82d13885d8bf7a3b8b9fa6f0842ba60,088abf8a8657959e46ac19af8da80d15,8ea65bc866b36a8f00ae913e0c3acc29
 3 | 1,1,-0.7202996418188664,77490d05a721c6d93edf580642ffd8bd,088abf8a8657959e46ac19af8da80d15,8ea65bc866b36a8f00ae913e0c3acc29
 4 | 2,2,0.7456623052631924,77490d05a721c6d93edf580642ffd8bd,bfcce809dad48aadc7fcbe714f9eabd7,7a0c97ee71eb7985bd0a6271ce57cec5
 5 | 3,3,-0.6987413778911891,135f410ec21307919cd92df77f1e2a36,ff2de7df709624e5b79199b850382ea0,68f8b5168b2a322db725a6cd6f5c900b
 6 | 4,4,1.6511093902256406,61a525de9976c0f3fa29d400caf26c56,ee987234ffe4f3d901846ac3f7417738,7a0c97ee71eb7985bd0a6271ce57cec5
 7 | 5,5,0.14203091528822703,61a525de9976c0f3fa29d400caf26c56,bb7caf7f0c11f7827fb23b331777b871,8ea65bc866b36a8f00ae913e0c3acc29
 8 | 6,6,1.6511093902256406,c82d13885d8bf7a3b8b9fa6f0842ba60,818dfe387422471f09a34db693a78212,7a0c97ee71eb7985bd0a6271ce57cec5
 9 | 7,7,2.8583721701755715,61a525de9976c0f3fa29d400caf26c56,bfcce809dad48aadc7fcbe714f9eabd7,7a0c97ee71eb7985bd0a6271ce57cec5
10 | 8,8,1.349293695238158,61a525de9976c0f3fa29d400caf26c56,7daaf8717f83289266063b6cc1728087,7a0c97ee71eb7985bd0a6271ce57cec5
11 | 9,9,1.1983858477444165,135f410ec21307919cd92df77f1e2a36,bfcce809dad48aadc7fcbe714f9eabd7,7a0c97ee71eb7985bd0a6271ce57cec5
12 | 10,10,1.5864345984426087,135f410ec21307919cd92df77f1e2a36,bfcce809dad48aadc7fcbe714f9eabd7,7a0c97ee71eb7985bd0a6271ce57cec5
13 | 11,11,0.44384661027570976,c82d13885d8bf7a3b8b9fa6f0842ba60,24ea3b3a472c51dd6299ebdfb220a55f,0c3b42b13b5a49fcb746da9f60e63717
14 | 12,12,1.1983858477444165,c82d13885d8bf7a3b8b9fa6f0842ba60,0e077f97ef2dcda0dc404f873fc5f96c,7a0c97ee71eb7985bd0a6271ce57cec5
15 | 13,13,0.6163127216971285,135f410ec21307919cd92df77f1e2a36,0e077f97ef2dcda0dc404f873fc5f96c,7a0c97ee71eb7985bd0a6271ce57cec5
16 | 14,14,-1.000557072878672,135f410ec21307919cd92df77f1e2a36,865945b5265169a2176a6e5f084ab2eb,8ea65bc866b36a8f00ae913e0c3acc29
17 | 15,15,-0.37536741897602904,c82d13885d8bf7a3b8b9fa6f0842ba60,786ff5d72b02d1e68a43508d9579977d,68f8b5168b2a322db725a6cd6f5c900b
18 | 16,16,-0.5909500582528024,c82d13885d8bf7a3b8b9fa6f0842ba60,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b
19 | 17,17,-0.6987413778911891,135f410ec21307919cd92df77f1e2a36,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b
20 | 18,18,-0.9143240171679625,77490d05a721c6d93edf580642ffd8bd,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b
21 | 19,19,-0.7634161696742211,77490d05a721c6d93edf580642ffd8bd,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b
22 | 20,20,-0.6125083221804798,77490d05a721c6d93edf580642ffd8bd,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b
23 | 21,21,-0.6987413778911891,135f410ec21307919cd92df77f1e2a36,ff2de7df709624e5b79199b850382ea0,68f8b5168b2a322db725a6cd6f5c900b
24 | 22,22,-0.6987413778911891,17ef71cb22e550d31e5eaa4d629c4abd,088abf8a8657959e46ac19af8da80d15,8ea65bc866b36a8f00ae913e0c3acc29
25 | 23,23,-0.5693917943251251,e1b1451d555c82a01874347dbecdfeae,01b306b40a448bff555c06d5d72c0171,7a0c97ee71eb7985bd0a6271ce57cec5
26 | 24,24,0.4222883463480324,f15de9aa508214df06454736b488717c,7daaf8717f83289266063b6cc1728087,7a0c97ee71eb7985bd0a6271ce57cec5
27 | 25,25,-0.4616004746867384,135f410ec21307919cd92df77f1e2a36,088abf8a8657959e46ac19af8da80d15,8ea65bc866b36a8f00ae913e0c3acc29
28 | 26,26,0.8965701527569339,77490d05a721c6d93edf580642ffd8bd,746facf4548f3da6d628b8e35bf9e6ec,7a0c97ee71eb7985bd0a6271ce57cec5
29 | 27,27,-0.8496492253849305,17ef71cb22e550d31e5eaa4d629c4abd,32338184693488f3a469822fd0a08387,68f8b5168b2a322db725a6cd6f5c900b
30 | 28,28,-1.0652318646617038,17ef71cb22e550d31e5eaa4d629c4abd,a46137fea33ac48f0809591a76630ea5,68f8b5168b2a322db725a6cd6f5c900b
31 | 29,29,-0.8496492253849305,17ef71cb22e550d31e5eaa4d629c4abd,008dc8758000efaf5b318227fcb71f8d,8ea65bc866b36a8f00ae913e0c3acc29
32 | 30,30,-0.9143240171679625,17ef71cb22e550d31e5eaa4d629c4abd,865945b5265169a2176a6e5f084ab2eb,8ea65bc866b36a8f00ae913e0c3acc29
33 | 31,31,-0.4616004746867384,e1b1451d555c82a01874347dbecdfeae,008dc8758000efaf5b318227fcb71f8d,8ea65bc866b36a8f00ae913e0c3acc29
34 | 32,32,-0.5262752664697704,f15de9aa508214df06454736b488717c,a46137fea33ac48f0809591a76630ea5,68f8b5168b2a322db725a6cd6f5c900b
35 | 33,33,-0.6125083221804798,f15de9aa508214df06454736b488717c,088abf8a8657959e46ac19af8da80d15,8ea65bc866b36a8f00ae913e0c3acc29
36 | 


--------------------------------------------------------------------------------
/obp/dataset/obd/bts/women/item_context.csv:
--------------------------------------------------------------------------------
 1 | ,item_id,item_feature_0,item_feature_1,item_feature_2,item_feature_3
 2 | 0,0,-0.3701057045375884,37784fea97b5827eeaf4a23dbff98b73,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1
 3 | 1,1,0.5251956676347125,3220392a73f0fb73e5509a3f6b89ae64,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57
 4 | 2,2,-0.13450008028171972,1f0bd59babc615f7876d70abd81b0703,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1
 5 | 3,3,-0.5350296415166964,37784fea97b5827eeaf4a23dbff98b73,30e4f82eec0c5210c403aab8007a5881,2951c610187f9e9e8281ecd31a156bd1
 6 | 4,4,-0.25230289240965403,3220392a73f0fb73e5509a3f6b89ae64,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d
 7 | 5,5,0.03042385669738834,3220392a73f0fb73e5509a3f6b89ae64,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d
 8 | 6,6,-0.13450008028171972,3220392a73f0fb73e5509a3f6b89ae64,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d
 9 | 7,7,-0.8436730092918844,54130721ea2331736ec3cd62c6ff2a0a,92b8c61a1a556299172a5705f5a927db,360f242a6660cf5ee5249dc3c197fe62
10 | 8,8,-0.8436730092918844,54130721ea2331736ec3cd62c6ff2a0a,92b8c61a1a556299172a5705f5a927db,360f242a6660cf5ee5249dc3c197fe62
11 | 9,9,-0.6080673850360158,54130721ea2331736ec3cd62c6ff2a0a,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57
12 | 10,10,-0.6080673850360158,54130721ea2331736ec3cd62c6ff2a0a,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57
13 | 11,11,0.36027173065560447,e88594e2095dc09c70763bd14b6bb16e,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1
14 | 12,12,0.36027173065560447,e88594e2095dc09c70763bd14b6bb16e,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d
15 | 13,13,2.9990547223213335,37784fea97b5827eeaf4a23dbff98b73,cedae94e7ca42bac679afaf582fda539,e6dceba864edcc7bf60d38616a52a13d
16 | 14,14,1.3498153525302528,1f0bd59babc615f7876d70abd81b0703,cedae94e7ca42bac679afaf582fda539,e6dceba864edcc7bf60d38616a52a13d
17 | 15,15,-0.25230289240965403,3220392a73f0fb73e5509a3f6b89ae64,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d
18 | 16,16,0.6901196046138206,1f0bd59babc615f7876d70abd81b0703,40b2c280a2676cf7e83a2c19a333d4a2,7ce347fef632da56f7d0cd2e3d96c9d2
19 | 17,17,2.339358974404901,1f0bd59babc615f7876d70abd81b0703,5e32ca87b332cb657386052c2962f06f,e6dceba864edcc7bf60d38616a52a13d
20 | 18,18,-0.39366626696317525,3220392a73f0fb73e5509a3f6b89ae64,c72dd70c97975aad5865c138b5c3c501,360f242a6660cf5ee5249dc3c197fe62
21 | 19,19,-0.8648775154749125,37784fea97b5827eeaf4a23dbff98b73,c72dd70c97975aad5865c138b5c3c501,360f242a6660cf5ee5249dc3c197fe62
22 | 20,20,0.8550435415929286,e88594e2095dc09c70763bd14b6bb16e,c72dd70c97975aad5865c138b5c3c501,360f242a6660cf5ee5249dc3c197fe62
23 | 21,21,-0.6292718912190439,1f0bd59babc615f7876d70abd81b0703,c72dd70c97975aad5865c138b5c3c501,360f242a6660cf5ee5249dc3c197fe62
24 | 22,22,0.8550435415929286,e88594e2095dc09c70763bd14b6bb16e,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57
25 | 23,23,-0.2994240172608278,37784fea97b5827eeaf4a23dbff98b73,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57
26 | 24,24,-0.7235141409213913,3220392a73f0fb73e5509a3f6b89ae64,9dc794e30838384fe6068c9636d35d39,360f242a6660cf5ee5249dc3c197fe62
27 | 25,25,-0.95911976517726,980e8ad619a60423e616b67cfb8e09b9,9dc794e30838384fe6068c9636d35d39,360f242a6660cf5ee5249dc3c197fe62
28 | 26,26,-0.6292718912190439,980e8ad619a60423e616b67cfb8e09b9,a164a8f4dbd09847e25a3956e12bccff,360f242a6660cf5ee5249dc3c197fe62
29 | 27,27,-0.6292718912190439,980e8ad619a60423e616b67cfb8e09b9,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57
30 | 28,28,0.3367111682300176,72f3f67e8e9907b474c547847f8d5fd3,92b8c61a1a556299172a5705f5a927db,360f242a6660cf5ee5249dc3c197fe62
31 | 29,29,-0.2994240172608278,980e8ad619a60423e616b67cfb8e09b9,a3438007435a63dbe0ea33f5a0d1e84a,360f242a6660cf5ee5249dc3c197fe62
32 | 30,30,-0.2994240172608278,980e8ad619a60423e616b67cfb8e09b9,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1
33 | 31,31,-0.2994240172608278,980e8ad619a60423e616b67cfb8e09b9,a3438007435a63dbe0ea33f5a0d1e84a,360f242a6660cf5ee5249dc3c197fe62
34 | 32,32,0.17178723125090953,72f3f67e8e9907b474c547847f8d5fd3,04a71d6c9b0aa3b9e462a6923d1e8393,25e55d04edea9bd0a20aff26ac263414
35 | 33,33,-0.32298457968641464,72f3f67e8e9907b474c547847f8d5fd3,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d
36 | 34,34,0.5016351052091257,72f3f67e8e9907b474c547847f8d5fd3,5e32ca87b332cb657386052c2962f06f,e6dceba864edcc7bf60d38616a52a13d
37 | 35,35,-0.5585902039422833,72f3f67e8e9907b474c547847f8d5fd3,b02dadb348cf4ac330bf1d90cb80237e,2951c610187f9e9e8281ecd31a156bd1
38 | 36,36,-0.13450008028171972,3220392a73f0fb73e5509a3f6b89ae64,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1
39 | 37,37,-0.39366626696317525,f3a3cc32a3967214164eb2709555b3f7,5e886301a0b2b816d35a1209d156acdd,2951c610187f9e9e8281ecd31a156bd1
40 | 38,38,-0.3701057045375884,cd7b41b498ea6d9180ad3fd389422c39,d746b3324d88d353b5e1b82780f4d180,680f9d18b8f0ffa4633d41a7738c3c57
41 | 39,39,-0.46434795423993586,734fc1b871abffa4db3be9bc16ad80f7,b02dadb348cf4ac330bf1d90cb80237e,2951c610187f9e9e8281ecd31a156bd1
42 | 40,40,4.15352228117509,e88594e2095dc09c70763bd14b6bb16e,cedae94e7ca42bac679afaf582fda539,e6dceba864edcc7bf60d38616a52a13d
43 | 41,41,0.03042385669738834,734fc1b871abffa4db3be9bc16ad80f7,e1f84cd2715873f04359fb55b370f328,9ef568e90bed3b76bc560e33435f7c1d
44 | 42,42,-0.4879085166655227,980e8ad619a60423e616b67cfb8e09b9,9dc794e30838384fe6068c9636d35d39,360f242a6660cf5ee5249dc3c197fe62
45 | 43,43,-0.95911976517726,980e8ad619a60423e616b67cfb8e09b9,9dc794e30838384fe6068c9636d35d39,360f242a6660cf5ee5249dc3c197fe62
46 | 44,44,-0.46434795423993586,980e8ad619a60423e616b67cfb8e09b9,a164a8f4dbd09847e25a3956e12bccff,360f242a6660cf5ee5249dc3c197fe62
47 | 45,45,-0.7941958281981519,980e8ad619a60423e616b67cfb8e09b9,1b433010466b794694fc6f5f29eac0d8,360f242a6660cf5ee5249dc3c197fe62
48 | 


--------------------------------------------------------------------------------
/obp/dataset/obd/random/men/item_context.csv:
--------------------------------------------------------------------------------
 1 | ,item_id,item_feature_0,item_feature_1,item_feature_2,item_feature_3
 2 | 0,0,-0.6771831139635117,ceca20033d7d36b74dc683ddfb804aa7,eb6f942c01859574cb88d2e62bf84354,795091554fd8f6b4a0ca7df81bf50a64
 3 | 1,1,-0.7202996418188664,270de57201b8ec18df9a72ed7ecf20eb,eb6f942c01859574cb88d2e62bf84354,795091554fd8f6b4a0ca7df81bf50a64
 4 | 2,2,0.7456623052631924,270de57201b8ec18df9a72ed7ecf20eb,2ba5916b91fd2d0688459ba79f033a9b,14fb049a96497a5deef345c1c38b2467
 5 | 3,3,-0.6987413778911891,cb4655bc2d2e54055efefb998883d6fe,1d8ba92fbaa83078dfe330d66b81e5d6,5cc21cc265333250f10b13783ab06472
 6 | 4,4,1.6511093902256406,ca9488139d82dbbf68a4e71fc7fe52f9,f65e8237cca7eb6b12f4f009a28a6f72,14fb049a96497a5deef345c1c38b2467
 7 | 5,5,0.14203091528822703,ca9488139d82dbbf68a4e71fc7fe52f9,571216af60c365e6a05e1c33c7041f5f,795091554fd8f6b4a0ca7df81bf50a64
 8 | 6,6,1.6511093902256406,ceca20033d7d36b74dc683ddfb804aa7,d56aaef6375c7844851af69b354331ba,14fb049a96497a5deef345c1c38b2467
 9 | 7,7,2.8583721701755715,ca9488139d82dbbf68a4e71fc7fe52f9,2ba5916b91fd2d0688459ba79f033a9b,14fb049a96497a5deef345c1c38b2467
10 | 8,8,1.349293695238158,ca9488139d82dbbf68a4e71fc7fe52f9,ef0257571cb05e9c0bba5446f9cfb0c9,14fb049a96497a5deef345c1c38b2467
11 | 9,9,1.1983858477444165,cb4655bc2d2e54055efefb998883d6fe,2ba5916b91fd2d0688459ba79f033a9b,14fb049a96497a5deef345c1c38b2467
12 | 10,10,1.5864345984426087,cb4655bc2d2e54055efefb998883d6fe,2ba5916b91fd2d0688459ba79f033a9b,14fb049a96497a5deef345c1c38b2467
13 | 11,11,0.44384661027570976,ceca20033d7d36b74dc683ddfb804aa7,b1dbb432e49fb71cc3b3e820ff31f3ad,6893a4373a4e271e7f03b7a4bdfde4a3
14 | 12,12,1.1983858477444165,ceca20033d7d36b74dc683ddfb804aa7,09122ea36aaf2a8dff8f089286af7cf3,14fb049a96497a5deef345c1c38b2467
15 | 13,13,0.6163127216971285,cb4655bc2d2e54055efefb998883d6fe,09122ea36aaf2a8dff8f089286af7cf3,14fb049a96497a5deef345c1c38b2467
16 | 14,14,-1.000557072878672,cb4655bc2d2e54055efefb998883d6fe,ec5fb795fb7b3a111ad15e1506487535,795091554fd8f6b4a0ca7df81bf50a64
17 | 15,15,-0.37536741897602904,ceca20033d7d36b74dc683ddfb804aa7,e26d13daee6e371dead874b89752bbbe,5cc21cc265333250f10b13783ab06472
18 | 16,16,-0.5909500582528024,ceca20033d7d36b74dc683ddfb804aa7,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472
19 | 17,17,-0.6987413778911891,cb4655bc2d2e54055efefb998883d6fe,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472
20 | 18,18,-0.9143240171679625,270de57201b8ec18df9a72ed7ecf20eb,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472
21 | 19,19,-0.7634161696742211,270de57201b8ec18df9a72ed7ecf20eb,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472
22 | 20,20,-0.6125083221804798,270de57201b8ec18df9a72ed7ecf20eb,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472
23 | 21,21,-0.6987413778911891,cb4655bc2d2e54055efefb998883d6fe,1d8ba92fbaa83078dfe330d66b81e5d6,5cc21cc265333250f10b13783ab06472
24 | 22,22,-0.6987413778911891,dbb8044a5cc8d79d0e5c3cf996e2d0b9,eb6f942c01859574cb88d2e62bf84354,795091554fd8f6b4a0ca7df81bf50a64
25 | 23,23,-0.5693917943251251,0450516d22e9e70b0ee136549576d0e7,937bfc1b19face0ab0a21dddaeaf19cd,14fb049a96497a5deef345c1c38b2467
26 | 24,24,0.4222883463480324,314759c31d4b75b54dfbbeb887f7bbe8,ef0257571cb05e9c0bba5446f9cfb0c9,14fb049a96497a5deef345c1c38b2467
27 | 25,25,-0.4616004746867384,cb4655bc2d2e54055efefb998883d6fe,eb6f942c01859574cb88d2e62bf84354,795091554fd8f6b4a0ca7df81bf50a64
28 | 26,26,0.8965701527569339,270de57201b8ec18df9a72ed7ecf20eb,ff86755a0252ce6d030f37e89025f60f,14fb049a96497a5deef345c1c38b2467
29 | 27,27,-0.8496492253849305,dbb8044a5cc8d79d0e5c3cf996e2d0b9,03053cdb09aecdd139df91ac8068987d,5cc21cc265333250f10b13783ab06472
30 | 28,28,-1.0652318646617038,dbb8044a5cc8d79d0e5c3cf996e2d0b9,3f7cf3ddf1cc36d8310a8c0a48187aa9,5cc21cc265333250f10b13783ab06472
31 | 29,29,-0.8496492253849305,dbb8044a5cc8d79d0e5c3cf996e2d0b9,5adc59d478af904390b1de5af7f33d45,795091554fd8f6b4a0ca7df81bf50a64
32 | 30,30,-0.9143240171679625,dbb8044a5cc8d79d0e5c3cf996e2d0b9,ec5fb795fb7b3a111ad15e1506487535,795091554fd8f6b4a0ca7df81bf50a64
33 | 31,31,-0.4616004746867384,0450516d22e9e70b0ee136549576d0e7,5adc59d478af904390b1de5af7f33d45,795091554fd8f6b4a0ca7df81bf50a64
34 | 32,32,-0.5262752664697704,314759c31d4b75b54dfbbeb887f7bbe8,3f7cf3ddf1cc36d8310a8c0a48187aa9,5cc21cc265333250f10b13783ab06472
35 | 33,33,-0.6125083221804798,314759c31d4b75b54dfbbeb887f7bbe8,eb6f942c01859574cb88d2e62bf84354,795091554fd8f6b4a0ca7df81bf50a64
36 | 


--------------------------------------------------------------------------------
/obp/dataset/obd/random/women/item_context.csv:
--------------------------------------------------------------------------------
 1 | ,item_id,item_feature_0,item_feature_1,item_feature_2,item_feature_3
 2 | 0,0,-0.3701057045375884,01a0a328db2dd2a2e8d91bc43f204ba7,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb
 3 | 1,1,0.5251956676347125,dd868ca2c498f3384250f431e7767b34,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb
 4 | 2,2,-0.13450008028171972,252326b1475c78b26365ebc3430adca2,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb
 5 | 3,3,-0.5350296415166964,01a0a328db2dd2a2e8d91bc43f204ba7,d549c11ab8eb14045de2100d6ab90c86,6476528092c639c0ea8f74062f3dd1bb
 6 | 4,4,-0.25230289240965403,dd868ca2c498f3384250f431e7767b34,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347
 7 | 5,5,0.03042385669738834,dd868ca2c498f3384250f431e7767b34,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347
 8 | 6,6,-0.13450008028171972,dd868ca2c498f3384250f431e7767b34,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347
 9 | 7,7,-0.8436730092918844,2f872b67f01f5f2f85b24eb87e99d52c,9f43744cf9ae18357d9da7e6b130d3ea,465917095d1b8b7359e781ee782c2c26
10 | 8,8,-0.8436730092918844,2f872b67f01f5f2f85b24eb87e99d52c,9f43744cf9ae18357d9da7e6b130d3ea,465917095d1b8b7359e781ee782c2c26
11 | 9,9,-0.6080673850360158,2f872b67f01f5f2f85b24eb87e99d52c,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb
12 | 10,10,-0.6080673850360158,2f872b67f01f5f2f85b24eb87e99d52c,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb
13 | 11,11,0.36027173065560447,ef42bd4fa577ce60a5b82b6781a08c64,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb
14 | 12,12,0.36027173065560447,ef42bd4fa577ce60a5b82b6781a08c64,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347
15 | 13,13,2.9990547223213335,01a0a328db2dd2a2e8d91bc43f204ba7,ae34b8819f09a1df5bc36a3ebb2ca7c1,75b8605bfcb7433d5bd178b3a0a2d38c
16 | 14,14,1.3498153525302528,252326b1475c78b26365ebc3430adca2,ae34b8819f09a1df5bc36a3ebb2ca7c1,75b8605bfcb7433d5bd178b3a0a2d38c
17 | 15,15,-0.25230289240965403,dd868ca2c498f3384250f431e7767b34,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347
18 | 16,16,0.6901196046138206,252326b1475c78b26365ebc3430adca2,0409e7011c80bccc0ff6442a03d05b29,c395d5f54cf50e223953258801be2697
19 | 17,17,2.339358974404901,252326b1475c78b26365ebc3430adca2,836017345da8a6725b8eed235c5ec3d0,75b8605bfcb7433d5bd178b3a0a2d38c
20 | 18,18,-0.39366626696317525,dd868ca2c498f3384250f431e7767b34,d34d1f81e32b9e570fab69c523409c8d,465917095d1b8b7359e781ee782c2c26
21 | 19,19,-0.8648775154749125,01a0a328db2dd2a2e8d91bc43f204ba7,d34d1f81e32b9e570fab69c523409c8d,465917095d1b8b7359e781ee782c2c26
22 | 20,20,0.8550435415929286,ef42bd4fa577ce60a5b82b6781a08c64,d34d1f81e32b9e570fab69c523409c8d,465917095d1b8b7359e781ee782c2c26
23 | 21,21,-0.6292718912190439,252326b1475c78b26365ebc3430adca2,d34d1f81e32b9e570fab69c523409c8d,465917095d1b8b7359e781ee782c2c26
24 | 22,22,0.8550435415929286,ef42bd4fa577ce60a5b82b6781a08c64,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb
25 | 23,23,-0.2994240172608278,01a0a328db2dd2a2e8d91bc43f204ba7,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb
26 | 24,24,-0.7235141409213913,dd868ca2c498f3384250f431e7767b34,e82eb366fa5e481c13b09c24eeeb036d,465917095d1b8b7359e781ee782c2c26
27 | 25,25,-0.95911976517726,4c508776e494a9f4bc302b34fdc6e76e,e82eb366fa5e481c13b09c24eeeb036d,465917095d1b8b7359e781ee782c2c26
28 | 26,26,-0.6292718912190439,4c508776e494a9f4bc302b34fdc6e76e,683522ca22eaee449c5ac25c2a84ee52,465917095d1b8b7359e781ee782c2c26
29 | 27,27,-0.6292718912190439,4c508776e494a9f4bc302b34fdc6e76e,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb
30 | 28,28,0.3367111682300176,de083a9403b58424cb3834909131a6de,9f43744cf9ae18357d9da7e6b130d3ea,465917095d1b8b7359e781ee782c2c26
31 | 29,29,-0.2994240172608278,4c508776e494a9f4bc302b34fdc6e76e,e98a5a6ce8eca89f5d9084dee8079f60,465917095d1b8b7359e781ee782c2c26
32 | 30,30,-0.2994240172608278,4c508776e494a9f4bc302b34fdc6e76e,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb
33 | 31,31,-0.2994240172608278,4c508776e494a9f4bc302b34fdc6e76e,e98a5a6ce8eca89f5d9084dee8079f60,465917095d1b8b7359e781ee782c2c26
34 | 32,32,0.17178723125090953,de083a9403b58424cb3834909131a6de,75fb3fbc11695c908a1397f96079949b,7ab06c804ac515866a347cb9a54bf2c8
35 | 33,33,-0.32298457968641464,de083a9403b58424cb3834909131a6de,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347
36 | 34,34,0.5016351052091257,de083a9403b58424cb3834909131a6de,836017345da8a6725b8eed235c5ec3d0,75b8605bfcb7433d5bd178b3a0a2d38c
37 | 35,35,-0.5585902039422833,de083a9403b58424cb3834909131a6de,7e7fdf8c70a61405fea41ab1bf7cca25,6476528092c639c0ea8f74062f3dd1bb
38 | 36,36,-0.13450008028171972,dd868ca2c498f3384250f431e7767b34,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb
39 | 37,37,-0.39366626696317525,5c1e1f8eb530ea4363c04483cd523ac4,e0b3b6a06ee21c261d6937e51fef2f47,6476528092c639c0ea8f74062f3dd1bb
40 | 38,38,-0.3701057045375884,3f7aceec173a91029fead403c0fa4bc9,5eb5c6e65b24468fef997e461cb97425,0d1d6660a28b567ddedbaa991a056feb
41 | 39,39,-0.46434795423993586,a37dab32ea544e235487fb30dc1b29f1,7e7fdf8c70a61405fea41ab1bf7cca25,6476528092c639c0ea8f74062f3dd1bb
42 | 40,40,4.15352228117509,ef42bd4fa577ce60a5b82b6781a08c64,ae34b8819f09a1df5bc36a3ebb2ca7c1,75b8605bfcb7433d5bd178b3a0a2d38c
43 | 41,41,0.03042385669738834,a37dab32ea544e235487fb30dc1b29f1,8378abb75da7f75587cb8cd4b687c929,82f96bb81ce9feeb0c973d24adccc347
44 | 42,42,-0.4879085166655227,4c508776e494a9f4bc302b34fdc6e76e,e82eb366fa5e481c13b09c24eeeb036d,465917095d1b8b7359e781ee782c2c26
45 | 43,43,-0.95911976517726,4c508776e494a9f4bc302b34fdc6e76e,e82eb366fa5e481c13b09c24eeeb036d,465917095d1b8b7359e781ee782c2c26
46 | 44,44,-0.46434795423993586,4c508776e494a9f4bc302b34fdc6e76e,683522ca22eaee449c5ac25c2a84ee52,465917095d1b8b7359e781ee782c2c26
47 | 45,45,-0.7941958281981519,4c508776e494a9f4bc302b34fdc6e76e,14692cff9f8196fb8846653310d39719,465917095d1b8b7359e781ee782c2c26
48 | 


--------------------------------------------------------------------------------
/obp/dataset/reward_type.py:
--------------------------------------------------------------------------------
 1 | import enum
 2 | 
 3 | 
 4 | class RewardType(enum.Enum):
 5 |     """Reward type.
 6 | 
 7 |     Attributes
 8 |     ----------
 9 |     BINARY:
10 |         The reward type is binary.
11 |     CONTINUOUS:
12 |         The reward type is continuous.
13 |     """
14 | 
15 |     BINARY = "binary"
16 |     CONTINUOUS = "continuous"
17 | 
18 |     def __repr__(self) -> str:
19 | 
20 |         return str(self)
21 | 


--------------------------------------------------------------------------------
/obp/ope/__init__.py:
--------------------------------------------------------------------------------
  1 | from obp.ope.classification_model import ImportanceWeightEstimator
  2 | from obp.ope.classification_model import PropensityScoreEstimator
  3 | from obp.ope.estimators import BalancedInverseProbabilityWeighting
  4 | from obp.ope.estimators import BaseOffPolicyEstimator
  5 | from obp.ope.estimators import DirectMethod
  6 | from obp.ope.estimators import DoublyRobust
  7 | from obp.ope.estimators import DoublyRobustWithShrinkage
  8 | from obp.ope.estimators import InverseProbabilityWeighting
  9 | from obp.ope.estimators import ReplayMethod
 10 | from obp.ope.estimators import SelfNormalizedDoublyRobust
 11 | from obp.ope.estimators import SelfNormalizedInverseProbabilityWeighting
 12 | from obp.ope.estimators import SubGaussianDoublyRobust
 13 | from obp.ope.estimators import SubGaussianInverseProbabilityWeighting
 14 | from obp.ope.estimators import SwitchDoublyRobust
 15 | from obp.ope.estimators_continuous import (
 16 |     KernelizedSelfNormalizedInverseProbabilityWeighting,
 17 | )
 18 | from obp.ope.estimators_continuous import BaseContinuousOffPolicyEstimator
 19 | from obp.ope.estimators_continuous import cosine_kernel
 20 | from obp.ope.estimators_continuous import epanechnikov_kernel
 21 | from obp.ope.estimators_continuous import gaussian_kernel
 22 | from obp.ope.estimators_continuous import KernelizedDoublyRobust
 23 | from obp.ope.estimators_continuous import KernelizedInverseProbabilityWeighting
 24 | from obp.ope.estimators_continuous import triangular_kernel
 25 | from obp.ope.estimators_embed import (
 26 |     SelfNormalizedMarginalizedInverseProbabilityWeighting,
 27 | )
 28 | from obp.ope.estimators_embed import MarginalizedInverseProbabilityWeighting
 29 | from obp.ope.estimators_multi import BaseMultiLoggersOffPolicyEstimator
 30 | from obp.ope.estimators_multi import MultiLoggersBalancedDoublyRobust
 31 | from obp.ope.estimators_multi import MultiLoggersBalancedInverseProbabilityWeighting
 32 | from obp.ope.estimators_multi import MultiLoggersNaiveDoublyRobust
 33 | from obp.ope.estimators_multi import MultiLoggersNaiveInverseProbabilityWeighting
 34 | from obp.ope.estimators_multi import MultiLoggersWeightedDoublyRobust
 35 | from obp.ope.estimators_multi import MultiLoggersWeightedInverseProbabilityWeighting
 36 | from obp.ope.estimators_slate import SelfNormalizedSlateIndependentIPS
 37 | from obp.ope.estimators_slate import SelfNormalizedSlateRewardInteractionIPS
 38 | from obp.ope.estimators_slate import SelfNormalizedSlateStandardIPS
 39 | from obp.ope.estimators_slate import SlateCascadeDoublyRobust
 40 | from obp.ope.estimators_slate import SlateIndependentIPS
 41 | from obp.ope.estimators_slate import SlateRewardInteractionIPS
 42 | from obp.ope.estimators_slate import SlateStandardIPS
 43 | from obp.ope.estimators_tuning import DoublyRobustTuning
 44 | from obp.ope.estimators_tuning import DoublyRobustWithShrinkageTuning
 45 | from obp.ope.estimators_tuning import InverseProbabilityWeightingTuning
 46 | from obp.ope.estimators_tuning import SubGaussianDoublyRobustTuning
 47 | from obp.ope.estimators_tuning import SubGaussianInverseProbabilityWeightingTuning
 48 | from obp.ope.estimators_tuning import SwitchDoublyRobustTuning
 49 | from obp.ope.meta import OffPolicyEvaluation
 50 | from obp.ope.meta_continuous import ContinuousOffPolicyEvaluation
 51 | from obp.ope.meta_multi import MultiLoggersOffPolicyEvaluation
 52 | from obp.ope.meta_slate import SlateOffPolicyEvaluation
 53 | from obp.ope.regression_model import RegressionModel
 54 | from obp.ope.regression_model_slate import SlateRegressionModel
 55 | 
 56 | 
 57 | __all__ = [
 58 |     "BaseOffPolicyEstimator",
 59 |     "ReplayMethod",
 60 |     "InverseProbabilityWeighting",
 61 |     "SelfNormalizedInverseProbabilityWeighting",
 62 |     "DirectMethod",
 63 |     "DoublyRobust",
 64 |     "SelfNormalizedDoublyRobust",
 65 |     "SwitchDoublyRobust",
 66 |     "DoublyRobustWithShrinkage",
 67 |     "SubGaussianInverseProbabilityWeighting",
 68 |     "SubGaussianDoublyRobust",
 69 |     "InverseProbabilityWeightingTuning",
 70 |     "DoublyRobustTuning",
 71 |     "SwitchDoublyRobustTuning",
 72 |     "DoublyRobustWithShrinkageTuning",
 73 |     "SubGaussianInverseProbabilityWeightingTuning",
 74 |     "SubGaussianDoublyRobustTuning",
 75 |     "MarginalizedInverseProbabilityWeighting",
 76 |     "SelfNormalizedMarginalizedInverseProbabilityWeighting",
 77 |     "BaseMultiLoggersOffPolicyEstimator",
 78 |     "MultiLoggersNaiveInverseProbabilityWeighting",
 79 |     "MultiLoggersWeightedInverseProbabilityWeighting",
 80 |     "MultiLoggersBalancedInverseProbabilityWeighting",
 81 |     "MultiLoggersNaiveDoublyRobust",
 82 |     "MultiLoggersBalancedDoublyRobust",
 83 |     "MultiLoggersWeightedDoublyRobust",
 84 |     "OffPolicyEvaluation",
 85 |     "SlateOffPolicyEvaluation",
 86 |     "ContinuousOffPolicyEvaluation",
 87 |     "MultiLoggersOffPolicyEvaluation",
 88 |     "RegressionModel",
 89 |     "SlateRegressionModel",
 90 |     "SlateStandardIPS",
 91 |     "SlateIndependentIPS",
 92 |     "SlateRewardInteractionIPS",
 93 |     "SlateCascadeDoublyRobust",
 94 |     "SelfNormalizedSlateRewardInteractionIPS",
 95 |     "SelfNormalizedSlateIndependentIPS",
 96 |     "SelfNormalizedSlateStandardIPS",
 97 |     "BalancedInverseProbabilityWeighting",
 98 |     "ImportanceWeightEstimator",
 99 |     "PropensityScoreEstimator",
100 |     "BaseContinuousOffPolicyEstimator",
101 |     "KernelizedInverseProbabilityWeighting",
102 |     "KernelizedSelfNormalizedInverseProbabilityWeighting",
103 |     "KernelizedDoublyRobust",
104 |     "triangular_kernel",
105 |     "gaussian_kernel",
106 |     "epanechnikov_kernel",
107 |     "cosine_kernel",
108 | ]
109 | 
110 | __all_estimators__ = [
111 |     "ReplayMethod",
112 |     "InverseProbabilityWeighting",
113 |     "SelfNormalizedInverseProbabilityWeighting",
114 |     "DirectMethod",
115 |     "DoublyRobust",
116 |     "DoublyRobustWithShrinkage",
117 |     "SwitchDoublyRobust",
118 |     "SelfNormalizedDoublyRobust",
119 |     "SubGaussianInverseProbabilityWeighting",
120 |     "SubGaussianDoublyRobust",
121 |     "BalancedInverseProbabilityWeighting",
122 | ]
123 | 
124 | 
125 | __all_estimators_tuning__ = [
126 |     "InverseProbabilityWeightingTuning",
127 |     "DoublyRobustTuning",
128 |     "SwitchDoublyRobustTuning",
129 |     "DoublyRobustWithShrinkageTuning",
130 | ]
131 | 
132 | 
133 | __all_estimators_tuning_sg__ = [
134 |     "SubGaussianInverseProbabilityWeightingTuning",
135 |     "SubGaussianDoublyRobustTuning",
136 | ]
137 | 


--------------------------------------------------------------------------------
/obp/policy/__init__.py:
--------------------------------------------------------------------------------
 1 | from obp.policy.base import BaseContextFreePolicy
 2 | from obp.policy.base import BaseContextualPolicy
 3 | from obp.policy.base import BaseContinuousOfflinePolicyLearner
 4 | from obp.policy.base import BaseOfflinePolicyLearner
 5 | from obp.policy.contextfree import BernoulliTS
 6 | from obp.policy.contextfree import EpsilonGreedy
 7 | from obp.policy.contextfree import Random
 8 | from obp.policy.linear import LinEpsilonGreedy
 9 | from obp.policy.linear import LinTS
10 | from obp.policy.linear import LinUCB
11 | from obp.policy.logistic import LogisticEpsilonGreedy
12 | from obp.policy.logistic import LogisticTS
13 | from obp.policy.logistic import LogisticUCB
14 | from obp.policy.logistic import MiniBatchLogisticRegression
15 | from obp.policy.offline import IPWLearner
16 | from obp.policy.offline import NNPolicyLearner
17 | from obp.policy.offline import QLearner
18 | from obp.policy.offline_continuous import ContinuousNNPolicyLearner
19 | 
20 | 
21 | __all__ = [
22 |     "BaseContextFreePolicy",
23 |     "BaseContextualPolicy",
24 |     "BaseOfflinePolicyLearner",
25 |     "BaseContinuousOfflinePolicyLearner",
26 |     "EpsilonGreedy",
27 |     "Random",
28 |     "BernoulliTS",
29 |     "LinEpsilonGreedy",
30 |     "LinUCB",
31 |     "LinTS",
32 |     "LogisticEpsilonGreedy",
33 |     "LogisticUCB",
34 |     "LogisticTS",
35 |     "MiniBatchLogisticRegression",
36 |     "IPWLearner",
37 |     "NNPolicyLearner",
38 |     "QLearner",
39 |     "ContinuousNNPolicyLearner",
40 | ]
41 | 


--------------------------------------------------------------------------------
/obp/policy/conf/prior_bts.yaml:
--------------------------------------------------------------------------------
  1 | all:
  2 |   alpha:
  3 |     - 47.0
  4 |     - 8.0
  5 |     - 62.0
  6 |     - 142.0
  7 |     - 3.0
  8 |     - 14.0
  9 |     - 7.0
 10 |     - 857.0
 11 |     - 12.0
 12 |     - 15.0
 13 |     - 6.0
 14 |     - 100.0
 15 |     - 48.0
 16 |     - 23.0
 17 |     - 71.0
 18 |     - 61.0
 19 |     - 13.0
 20 |     - 16.0
 21 |     - 518.0
 22 |     - 30.0
 23 |     - 7.0
 24 |     - 4.0
 25 |     - 23.0
 26 |     - 8.0
 27 |     - 10.0
 28 |     - 11.0
 29 |     - 11.0
 30 |     - 18.0
 31 |     - 121.0
 32 |     - 11.0
 33 |     - 11.0
 34 |     - 10.0
 35 |     - 14.0
 36 |     - 9.0
 37 |     - 204.0
 38 |     - 58.0
 39 |     - 3.0
 40 |     - 19.0
 41 |     - 42.0
 42 |     - 1013.0
 43 |     - 2.0
 44 |     - 328.0
 45 |     - 15.0
 46 |     - 31.0
 47 |     - 14.0
 48 |     - 138.0
 49 |     - 45.0
 50 |     - 55.0
 51 |     - 23.0
 52 |     - 38.0
 53 |     - 10.0
 54 |     - 401.0
 55 |     - 52.0
 56 |     - 6.0
 57 |     - 3.0
 58 |     - 6.0
 59 |     - 5.0
 60 |     - 32.0
 61 |     - 35.0
 62 |     - 133.0
 63 |     - 52.0
 64 |     - 820.0
 65 |     - 43.0
 66 |     - 195.0
 67 |     - 8.0
 68 |     - 42.0
 69 |     - 40.0
 70 |     - 4.0
 71 |     - 32.0
 72 |     - 30.0
 73 |     - 9.0
 74 |     - 22.0
 75 |     - 6.0
 76 |     - 23.0
 77 |     - 5.0
 78 |     - 54.0
 79 |     - 8.0
 80 |     - 22.0
 81 |     - 65.0
 82 |     - 246.0
 83 |   beta:
 84 |     - 12198.0
 85 |     - 3566.0
 86 |     - 15993.0
 87 |     - 35522.0
 88 |     - 2367.0
 89 |     - 4609.0
 90 |     - 3171.0
 91 |     - 181745.0
 92 |     - 4372.0
 93 |     - 4951.0
 94 |     - 3100.0
 95 |     - 24665.0
 96 |     - 13210.0
 97 |     - 7061.0
 98 |     - 18061.0
 99 |     - 17449.0
100 |     - 5644.0
101 |     - 6787.0
102 |     - 111326.0
103 |     - 8776.0
104 |     - 3334.0
105 |     - 2271.0
106 |     - 7389.0
107 |     - 2659.0
108 |     - 3665.0
109 |     - 4724.0
110 |     - 3561.0
111 |     - 5085.0
112 |     - 27407.0
113 |     - 4601.0
114 |     - 4756.0
115 |     - 4120.0
116 |     - 4736.0
117 |     - 3788.0
118 |     - 45292.0
119 |     - 14719.0
120 |     - 2189.0
121 |     - 5589.0
122 |     - 11995.0
123 |     - 222255.0
124 |     - 2308.0
125 |     - 70034.0
126 |     - 4801.0
127 |     - 8274.0
128 |     - 5421.0
129 |     - 31912.0
130 |     - 12213.0
131 |     - 13576.0
132 |     - 6230.0
133 |     - 10382.0
134 |     - 4141.0
135 |     - 85731.0
136 |     - 12811.0
137 |     - 2707.0
138 |     - 2250.0
139 |     - 2668.0
140 |     - 2886.0
141 |     - 9581.0
142 |     - 9465.0
143 |     - 28336.0
144 |     - 12062.0
145 |     - 162793.0
146 |     - 12107.0
147 |     - 41240.0
148 |     - 3162.0
149 |     - 11604.0
150 |     - 10818.0
151 |     - 2923.0
152 |     - 8897.0
153 |     - 8654.0
154 |     - 4000.0
155 |     - 6580.0
156 |     - 3174.0
157 |     - 6766.0
158 |     - 2602.0
159 |     - 14506.0
160 |     - 3968.0
161 |     - 7523.0
162 |     - 16532.0
163 |     - 51964.0
164 | men:
165 |   alpha:
166 |     - 47.0
167 |     - 8.0
168 |     - 62.0
169 |     - 142.0
170 |     - 3.0
171 |     - 6.0
172 |     - 100.0
173 |     - 48.0
174 |     - 23.0
175 |     - 71.0
176 |     - 61.0
177 |     - 13.0
178 |     - 16.0
179 |     - 518.0
180 |     - 30.0
181 |     - 7.0
182 |     - 4.0
183 |     - 23.0
184 |     - 8.0
185 |     - 10.0
186 |     - 11.0
187 |     - 11.0
188 |     - 18.0
189 |     - 121.0
190 |     - 11.0
191 |     - 4.0
192 |     - 32.0
193 |     - 30.0
194 |     - 9.0
195 |     - 22.0
196 |     - 6.0
197 |     - 23.0
198 |     - 5.0
199 |     - 54.0
200 |   beta:
201 |     - 12198.0
202 |     - 3566.0
203 |     - 15993.0
204 |     - 35522.0
205 |     - 2367.0
206 |     - 3100.0
207 |     - 24665.0
208 |     - 13210.0
209 |     - 7061.0
210 |     - 18061.0
211 |     - 17449.0
212 |     - 5644.0
213 |     - 6787.0
214 |     - 111326.0
215 |     - 8776.0
216 |     - 3334.0
217 |     - 2271.0
218 |     - 7389.0
219 |     - 2659.0
220 |     - 3665.0
221 |     - 4724.0
222 |     - 3561.0
223 |     - 5085.0
224 |     - 27407.0
225 |     - 4601.0
226 |     - 2923.0
227 |     - 8897.0
228 |     - 8654.0
229 |     - 4000.0
230 |     - 6580.0
231 |     - 3174.0
232 |     - 6766.0
233 |     - 2602.0
234 |     - 14506.0
235 | women:
236 |   alpha:
237 |     - 12.0
238 |     - 7.0
239 |     - 984.0
240 |     - 13.0
241 |     - 15.0
242 |     - 15.0
243 |     - 11.0
244 |     - 14.0
245 |     - 9.0
246 |     - 200.0
247 |     - 72.0
248 |     - 3.0
249 |     - 14.0
250 |     - 49.0
251 |     - 1278.0
252 |     - 3.0
253 |     - 325.0
254 |     - 14.0
255 |     - 27.0
256 |     - 14.0
257 |     - 169.0
258 |     - 48.0
259 |     - 47.0
260 |     - 18.0
261 |     - 40.0
262 |     - 12.0
263 |     - 447.0
264 |     - 46.0
265 |     - 5.0
266 |     - 3.0
267 |     - 5.0
268 |     - 7.0
269 |     - 35.0
270 |     - 34.0
271 |     - 99.0
272 |     - 30.0
273 |     - 880.0
274 |     - 51.0
275 |     - 182.0
276 |     - 6.0
277 |     - 45.0
278 |     - 39.0
279 |     - 10.0
280 |     - 24.0
281 |     - 72.0
282 |     - 229.0
283 |   beta:
284 |     - 3612.0
285 |     - 3173.0
286 |     - 204484.0
287 |     - 4517.0
288 |     - 4765.0
289 |     - 5331.0
290 |     - 4131.0
291 |     - 4728.0
292 |     - 4028.0
293 |     - 44280.0
294 |     - 17918.0
295 |     - 2309.0
296 |     - 4339.0
297 |     - 12922.0
298 |     - 270771.0
299 |     - 2480.0
300 |     - 68475.0
301 |     - 5129.0
302 |     - 7367.0
303 |     - 5819.0
304 |     - 38026.0
305 |     - 13047.0
306 |     - 11604.0
307 |     - 5394.0
308 |     - 10912.0
309 |     - 4439.0
310 |     - 94485.0
311 |     - 10700.0
312 |     - 2679.0
313 |     - 2319.0
314 |     - 2578.0
315 |     - 3288.0
316 |     - 9566.0
317 |     - 9775.0
318 |     - 20120.0
319 |     - 7317.0
320 |     - 172026.0
321 |     - 13673.0
322 |     - 37329.0
323 |     - 3365.0
324 |     - 10911.0
325 |     - 10734.0
326 |     - 4278.0
327 |     - 7574.0
328 |     - 16826.0
329 |     - 47462.0
330 | 
331 | 


--------------------------------------------------------------------------------
/obp/policy/policy_type.py:
--------------------------------------------------------------------------------
 1 | import enum
 2 | 
 3 | 
 4 | class PolicyType(enum.Enum):
 5 |     """Policy type.
 6 | 
 7 |     Attributes
 8 |     ----------
 9 |     CONTEXT_FREE:
10 |         The policy type is contextfree.
11 |     CONTEXTUAL:
12 |         The policy type is contextual.
13 |     OFFLINE:
14 |         The policy type is offline.
15 |     """
16 | 
17 |     CONTEXT_FREE = enum.auto()
18 |     CONTEXTUAL = enum.auto()
19 |     OFFLINE = enum.auto()
20 | 
21 |     def __repr__(self) -> str:
22 | 
23 |         return str(self)
24 | 


--------------------------------------------------------------------------------
/obp/simulator/__init__.py:
--------------------------------------------------------------------------------
1 | from obp.simulator.simulator import calc_ground_truth_policy_value
2 | 
3 | 
4 | __all__ = [
5 |     "calc_ground_truth_policy_value",
6 | ]
7 | 


--------------------------------------------------------------------------------
/obp/simulator/delay_sampler.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | import numpy as np
 4 | from sklearn.utils import check_random_state
 5 | 
 6 | 
 7 | @dataclass
 8 | class ExponentialDelaySampler:
 9 |     """Class for sampling delays from different exponential functions.
10 | 
11 |     Parameters
12 |     -----------
13 |     max_scale: float, default=100.0
14 |         The maximum scale parameter for the exponential delay distribution. When there is no weighted exponential
15 |         function the max_scale becomes the default scale.
16 | 
17 |     min_scale: float, default=10.0
18 |         The minimum scale parameter for the exponential delay distribution. Only used when sampling from a weighted
19 |         exponential function.
20 | 
21 |     random_state: int, default=12345
22 |         Controls the random seed in sampling synthetic bandit data.
23 |     """
24 | 
25 |     max_scale: float = 100.0
26 |     min_scale: float = 10.0
27 |     random_state: int = None
28 | 
29 |     def __post_init__(self) -> None:
30 |         if self.random_state is None:
31 |             raise ValueError("`random_state` must be given")
32 |         self.random_ = check_random_state(self.random_state)
33 | 
34 |     def exponential_delay_function(
35 |         self, n_rounds: int, n_actions: int, **kwargs
36 |     ) -> np.ndarray:
37 |         """Exponential delay function used for sampling a number of delay rounds before rewards can be observed.
38 | 
39 |         Note
40 |         ------
41 |         This implementation of the exponential delay function assumes that there is no causal relationship between the
42 |         context, action or reward and observed delay. Exponential delay function have been observed by Ktena, S.I. et al.
43 | 
44 |         Parameters
45 |         -----------
46 |         n_rounds: int
47 |             Number of rounds to sample delays for.
48 | 
49 |         n_actions: int
50 |             Number of actions to sample delays for. If the exponential function is not parameterised the delays are
51 |             repeated for each actions.
52 | 
53 |         Returns
54 |         ---------
55 |         delay_rounds: array-like, shape (n_rounds, )
56 |             Rounded up round delays representing the amount of rounds before the policy can observe the rewards.
57 | 
58 |         References
59 |         ------------
60 |         Ktena, S.I., Tejani, A., Theis, L., Myana, P.K., Dilipkumar, D., Huszár, F., Yoo, S. and Shi, W.
61 |         "Addressing delayed feedback for continuous training with neural networks in CTR prediction." 2019.
62 | 
63 |         """
64 |         delays_per_round = np.ceil(
65 |             self.random_.exponential(scale=self.max_scale, size=n_rounds)
66 |         )
67 | 
68 |         return np.tile(delays_per_round, (n_actions, 1)).T
69 | 
70 |     def exponential_delay_function_expected_reward_weighted(
71 |         self, expected_rewards: np.ndarray, **kwargs
72 |     ) -> np.ndarray:
73 |         """Exponential delay function used for sampling a number of delay rounds before rewards can be observed.
74 |         Each delay is conditioned on the expected reward by multiplying (1 - expected_reward) * scale. This creates
75 |         the assumption that the more likely a reward is going be observed, the more likely it will be that the reward
76 |         comes sooner. Eg. recommending an attractive item will likely result in a faster purchase.
77 | 
78 |          Parameters
79 |          -----------
80 |          expected_rewards : array-like, shape (n_rounds, n_actions)
81 |              The expected reward between 0 and 1 for each arm for each round. This used to weight the scale of the
82 |              exponential function.
83 | 
84 |          Returns
85 |          ---------
86 |          delay_rounds: array-like, shape (n_rounds, )
87 |              Rounded up round delays representing the amount of rounds before the policy can observe the rewards.
88 |         """
89 |         scale = self.min_scale + (
90 |             (1 - expected_rewards) * (self.max_scale - self.min_scale)
91 |         )
92 |         delays_per_round = np.ceil(
93 |             self.random_.exponential(scale=scale, size=expected_rewards.shape)
94 |         )
95 | 
96 |         return delays_per_round
97 | 


--------------------------------------------------------------------------------
/obp/simulator/replay.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tqdm as tqdm
 3 | 
 4 | from obp.policy.policy_type import PolicyType
 5 | from obp.simulator.simulator import BanditPolicy
 6 | from obp.types import BanditFeedback
 7 | from obp.utils import check_bandit_feedback_inputs, convert_to_action_dist
 8 | 
 9 | 
10 | def run_bandit_replay(
11 |     bandit_feedback: BanditFeedback, policy: BanditPolicy
12 | ) -> np.ndarray:
13 |     """Run an online bandit algorithm on given logged bandit feedback data using the replay method.
14 | 
15 |     Parameters
16 |     ----------
17 |     bandit_feedback: BanditFeedback
18 |         Logged bandit data used in offline bandit simulation.
19 |     policy: BanditPolicy
20 |         Online bandit policy to be evaluated in offline bandit simulation (i.e., evaluation policy).
21 |     Returns
22 |     --------
23 |     action_dist: array-like, shape (n_rounds, n_actions, len_list)
24 |         Action choice probabilities (can be deterministic).
25 | 
26 |     References
27 |     ------------
28 |     Lihong Li, Wei Chu, John Langford, and Xuanhui Wang.
29 |     "Unbiased Offline Evaluation of Contextual-bandit-based News Article Recommendation Algorithms.", 2011.
30 |     """
31 |     for key_ in ["action", "position", "reward", "pscore", "context"]:
32 |         if key_ not in bandit_feedback:
33 |             raise RuntimeError(f"Missing key of {key_} in 'bandit_feedback'.")
34 |     check_bandit_feedback_inputs(
35 |         context=bandit_feedback["context"],
36 |         action=bandit_feedback["action"],
37 |         reward=bandit_feedback["reward"],
38 |         position=bandit_feedback["position"],
39 |         pscore=bandit_feedback["pscore"],
40 |     )
41 | 
42 |     policy_ = policy
43 |     selected_actions_list = list()
44 |     dim_context = bandit_feedback["context"].shape[1]
45 |     if bandit_feedback["position"] is None:
46 |         bandit_feedback["position"] = np.zeros_like(
47 |             bandit_feedback["action"], dtype=int
48 |         )
49 |     for action_, reward_, position_, context_ in tqdm(
50 |         zip(
51 |             bandit_feedback["action"],
52 |             bandit_feedback["reward"],
53 |             bandit_feedback["position"],
54 |             bandit_feedback["context"],
55 |         ),
56 |         total=bandit_feedback["n_rounds"],
57 |     ):
58 | 
59 |         # select a list of actions
60 |         if policy_.policy_type == PolicyType.CONTEXT_FREE:
61 |             selected_actions = policy_.select_action()
62 |         elif policy_.policy_type == PolicyType.CONTEXTUAL:
63 |             selected_actions = policy_.select_action(context_.reshape(1, dim_context))
64 |         action_match_ = action_ == selected_actions[position_]
65 |         # update parameters of a bandit policy
66 |         # only when selected actions&positions are equal to logged actions&positions
67 |         if action_match_:
68 |             if policy_.policy_type == PolicyType.CONTEXT_FREE:
69 |                 policy_.update_params(action=action_, reward=reward_)
70 |             elif policy_.policy_type == PolicyType.CONTEXTUAL:
71 |                 policy_.update_params(
72 |                     action=action_,
73 |                     reward=reward_,
74 |                     context=context_.reshape(1, dim_context),
75 |                 )
76 |         selected_actions_list.append(selected_actions)
77 | 
78 |     action_dist = convert_to_action_dist(
79 |         n_actions=bandit_feedback["action"].max() + 1,
80 |         selected_actions=np.array(selected_actions_list),
81 |     )
82 |     return action_dist
83 | 


--------------------------------------------------------------------------------
/obp/types.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Yuta Saito, Yusuke Narita, and ZOZO Technologies, Inc. All rights reserved.
 2 | # Licensed under the Apache 2.0 License.
 3 | 
 4 | """Types."""
 5 | from typing import Dict
 6 | from typing import Union
 7 | 
 8 | import numpy as np
 9 | 
10 | 
11 | # dataset
12 | BanditFeedback = Dict[str, Union[int, np.ndarray]]
13 | 


--------------------------------------------------------------------------------
/obp/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.5.5"
2 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "obp"
 3 | version = "0.5.5"
 4 | description = "Open Bandit Pipeline: a python library for off-policy evaluation and learning"
 5 | authors = ["Yuta Saito <ys552@cornell.edu>"]
 6 | license = "Apache License 2.0"
 7 | 
 8 | [tool.poetry.dependencies]
 9 | python = ">=3.7.1,<3.10"
10 | torch = "^1.9.0"
11 | scikit-learn = "1.0.2"
12 | pandas = "^1.3.2"
13 | numpy = "^1.21.2"
14 | matplotlib = "^3.4.3"
15 | tqdm = "^4.62.2"
16 | scipy = "1.7.3"
17 | PyYAML = "^5.4.1"
18 | seaborn = "^0.11.2"
19 | pyieoe = "^0.1.1"
20 | pingouin = "^0.4.0"
21 | mypy-extensions = "^0.4.3"
22 | Pillow = "9.1.1"
23 | 
24 | [tool.poetry.dev-dependencies]
25 | flake8 = "^3.9.2"
26 | black = "22.1.0"
27 | pytest = "^6.2.5"
28 | isort = "^5.9.3"
29 | 
30 | [build-system]
31 | requires = ["poetry-core>=1.0.0"]
32 | build-backend = "poetry.core.masonry.api"
33 | 
34 | [tool.isort]
35 | profile = 'black'
36 | src_paths = ['obp', 'tests', 'examples', 'benchmark']
37 | line_length = 88
38 | lines_after_imports = 2
39 | force_single_line = 'True'
40 | force_sort_within_sections = 'True'
41 | order_by_type = 'False'
42 | 
43 | [tool.pytest.ini_options]
44 | addopts = "--color=yes"
45 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | ignore =
 3 |     E501,W503,W605,E203
 4 |     # We ignore E501: line too long because we assume
 5 |     # the checking of code length is already done by black.
 6 |     # We ignore W503: line break before binary operator because it is incompatible with black
 7 |     # We ignore W605: invalid escape sequence because it is needed to write math equations
 8 |     # We ignore E203: whitespace before ':'
 9 | exclude = .venv,build
10 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from obp.version import __version__
 2 | from setuptools import setup, find_packages
 3 | from os import path
 4 | import sys
 5 | 
 6 | here = path.abspath(path.dirname(__file__))
 7 | sys.path.insert(0, path.join(here, "obp"))
 8 | 
 9 | print("version")
10 | print(__version__)
11 | 
12 | with open(path.join(here, "README.md"), encoding="utf-8") as f:
13 |     long_description = f.read()
14 | 
15 | package_data_list = ["obp/policy/conf/prior_bts.yaml", "obp/dataset/obd"]
16 | 
17 | setup(
18 |     name="obp",
19 |     version=__version__,
20 |     description="Open Bandit Pipeline: a python library for bandit algorithms and off-policy evaluation",
21 |     url="https://github.com/st-tech/zr-obp",
22 |     author="Yuta Saito",
23 |     author_email="open-bandit-project@googlegroups.com",
24 |     keywords=["bandit algorithms", "off-policy evaluation"],
25 |     long_description=long_description,
26 |     long_description_content_type="text/markdown",
27 |     install_requires=[
28 |         "matplotlib>=3.4.3",
29 |         "mypy-extensions>=0.4.3",
30 |         "numpy>=1.21.2",
31 |         "pandas>=1.3.2",
32 |         "pyyaml>=5.1",
33 |         "seaborn>=0.10.1",
34 |         "scikit-learn>=1.0.2",
35 |         "scipy>=1.7.3",
36 |         "torch>=1.9.0",
37 |         "tqdm>=4.62.2",
38 |         "pyieoe>=0.1.1",
39 |         "pingouin>=0.4.0",
40 |     ],
41 |     license="Apache License",
42 |     packages=find_packages(
43 |         exclude=["benchmark", "docs", "examples", "obd", "tests", "slides"]
44 |     ),
45 |     package_data={"obp": package_data_list},
46 |     include_package_data=True,
47 |     classifiers=[
48 |         "Intended Audience :: Science/Research",
49 |         "Programming Language :: Python :: 3.7",
50 |         "Programming Language :: Python :: 3.8",
51 |         "Programming Language :: Python :: 3.9",
52 |         "Topic :: Scientific/Engineering",
53 |         "Topic :: Scientific/Engineering :: Mathematics",
54 |         "Topic :: Scientific/Engineering :: Artificial Intelligence",
55 |         "Topic :: Software Development",
56 |         "Topic :: Software Development :: Libraries",
57 |         "Topic :: Software Development :: Libraries :: Python Modules",
58 |         "License :: OSI Approved :: Apache Software License",
59 |     ],
60 | )
61 | 


--------------------------------------------------------------------------------
/slides/slides_EN.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/slides/slides_EN.pdf


--------------------------------------------------------------------------------
/slides/slides_JN.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/st-tech/zr-obp/8cbd5fa4558b7ad2ba4781546d6604e4cc3e07c4/slides/slides_JN.pdf


--------------------------------------------------------------------------------
/tests/dataset/test_multiclass.py:
--------------------------------------------------------------------------------
  1 | from typing import Tuple
  2 | 
  3 | import numpy as np
  4 | import pytest
  5 | from sklearn.datasets import load_digits
  6 | from sklearn.linear_model import LogisticRegression
  7 | 
  8 | from obp.dataset import MultiClassToBanditReduction
  9 | 
 10 | 
 11 | @pytest.fixture(scope="session")
 12 | def raw_data() -> Tuple[np.ndarray, np.ndarray]:
 13 |     X, y = load_digits(return_X_y=True)
 14 |     return X, y
 15 | 
 16 | 
 17 | def test_invalid_initialization(raw_data):
 18 |     X, y = raw_data
 19 | 
 20 |     # invalid alpha_b
 21 |     with pytest.raises(ValueError):
 22 |         MultiClassToBanditReduction(
 23 |             X=X, y=y, base_classifier_b=LogisticRegression(max_iter=10000), alpha_b=-0.3
 24 |         )
 25 | 
 26 |     with pytest.raises(ValueError):
 27 |         MultiClassToBanditReduction(
 28 |             X=X, y=y, base_classifier_b=LogisticRegression(max_iter=10000), alpha_b=1.3
 29 |         )
 30 | 
 31 |     # invalid classifier
 32 |     with pytest.raises(ValueError):
 33 |         from sklearn.tree import DecisionTreeRegressor
 34 | 
 35 |         MultiClassToBanditReduction(X=X, y=y, base_classifier_b=DecisionTreeRegressor)
 36 | 
 37 |     # invalid n_def_actions
 38 |     with pytest.raises(TypeError):
 39 |         MultiClassToBanditReduction(
 40 |             X=X,
 41 |             y=y,
 42 |             base_classifier_b=LogisticRegression(max_iter=10000),
 43 |             n_deficient_actions="aaa",
 44 |         )
 45 | 
 46 |     with pytest.raises(TypeError):
 47 |         MultiClassToBanditReduction(
 48 |             X=X,
 49 |             y=y,
 50 |             base_classifier_b=LogisticRegression(max_iter=10000),
 51 |             n_deficient_actions=None,
 52 |         )
 53 | 
 54 |     with pytest.raises(ValueError):
 55 |         MultiClassToBanditReduction(
 56 |             X=X,
 57 |             y=y,
 58 |             base_classifier_b=LogisticRegression(max_iter=10000),
 59 |             n_deficient_actions=-1,
 60 |         )
 61 | 
 62 |     with pytest.raises(ValueError):
 63 |         MultiClassToBanditReduction(
 64 |             X=X,
 65 |             y=y,
 66 |             base_classifier_b=LogisticRegression(max_iter=10000),
 67 |             n_deficient_actions=1000,
 68 |         )
 69 | 
 70 | 
 71 | def test_split_train_eval(raw_data):
 72 |     X, y = raw_data
 73 | 
 74 |     eval_size = 1000
 75 |     mcbr = MultiClassToBanditReduction(
 76 |         X=X, y=y, base_classifier_b=LogisticRegression(max_iter=10000), alpha_b=0.3
 77 |     )
 78 |     mcbr.split_train_eval(eval_size=eval_size)
 79 | 
 80 |     assert eval_size == mcbr.n_rounds_ev
 81 | 
 82 | 
 83 | def test_obtain_batch_bandit_feedback(raw_data):
 84 |     X, y = raw_data
 85 | 
 86 |     for n_deficient_actions in [0, 2]:
 87 |         mcbr = MultiClassToBanditReduction(
 88 |             X=X,
 89 |             y=y,
 90 |             base_classifier_b=LogisticRegression(max_iter=10000),
 91 |             alpha_b=0.3,
 92 |             n_deficient_actions=n_deficient_actions,
 93 |         )
 94 |         mcbr.split_train_eval()
 95 |         bandit_feedback = mcbr.obtain_batch_bandit_feedback()
 96 | 
 97 |         assert "n_actions" in bandit_feedback.keys()
 98 |         assert "n_rounds" in bandit_feedback.keys()
 99 |         assert "context" in bandit_feedback.keys()
100 |         assert "action" in bandit_feedback.keys()
101 |         assert "reward" in bandit_feedback.keys()
102 |         assert "position" in bandit_feedback.keys()
103 |         assert "pi_b" in bandit_feedback.keys()
104 |         assert "pscore" in bandit_feedback.keys()
105 | 
106 |         n_rounds = bandit_feedback["n_rounds"]
107 |         pi_b = bandit_feedback["pi_b"]
108 |         assert pi_b.shape[0] == n_rounds
109 |         n_actions = np.unique(y).shape[0]
110 |         assert pi_b.shape[1] == n_actions
111 |         assert pi_b.shape[2] == 1
112 |         assert np.allclose(pi_b[:, :, 0].sum(1), np.ones(n_rounds))
113 |         assert (pi_b == 0).sum() == n_deficient_actions * n_rounds
114 | 
115 | 
116 | def test_obtain_action_dist_by_eval_policy(raw_data):
117 |     X, y = raw_data
118 | 
119 |     eval_size = 1000
120 |     mcbr = MultiClassToBanditReduction(
121 |         X=X, y=y, base_classifier_b=LogisticRegression(max_iter=10000), alpha_b=0.3
122 |     )
123 |     mcbr.split_train_eval(eval_size=eval_size)
124 | 
125 |     # invalid alpha_e
126 |     with pytest.raises(ValueError):
127 |         mcbr.obtain_action_dist_by_eval_policy(alpha_e=-0.3)
128 | 
129 |     with pytest.raises(ValueError):
130 |         mcbr.obtain_action_dist_by_eval_policy(alpha_e=1.3)
131 | 
132 |     # valid type
133 |     action_dist = mcbr.obtain_action_dist_by_eval_policy()
134 | 
135 |     assert action_dist.shape[0] == eval_size
136 |     n_actions = np.unique(y).shape[0]
137 |     assert action_dist.shape[1] == n_actions
138 |     assert action_dist.shape[2] == 1
139 |     assert np.allclose(action_dist[:, :, 0].sum(1), np.ones(eval_size))
140 | 
141 | 
142 | def test_calc_ground_truth_policy_value(raw_data):
143 |     X, y = raw_data
144 | 
145 |     eval_size = 1000
146 |     mcbr = MultiClassToBanditReduction(
147 |         X=X, y=y, base_classifier_b=LogisticRegression(max_iter=10000), alpha_b=0.3
148 |     )
149 |     mcbr.split_train_eval(eval_size=eval_size)
150 | 
151 |     with pytest.raises(ValueError):
152 |         invalid_action_dist = np.zeros(eval_size)
153 |         mcbr.calc_ground_truth_policy_value(action_dist=invalid_action_dist)
154 | 
155 |     with pytest.raises(ValueError):
156 |         reshaped_action_dist = mcbr.obtain_action_dist_by_eval_policy().reshape(
157 |             1, -1, 1
158 |         )
159 |         mcbr.calc_ground_truth_policy_value(action_dist=reshaped_action_dist)
160 | 
161 |     action_dist = mcbr.obtain_action_dist_by_eval_policy()
162 |     ground_truth_policy_value = mcbr.calc_ground_truth_policy_value(
163 |         action_dist=action_dist
164 |     )
165 |     assert isinstance(ground_truth_policy_value, float)
166 | 


--------------------------------------------------------------------------------
/tests/dataset/test_real.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict
  2 | from typing import Tuple
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | import pytest
  7 | 
  8 | from obp.dataset import OpenBanditDataset
  9 | 
 10 | 
 11 | def test_real_init():
 12 |     # behavior_policy
 13 |     with pytest.raises(ValueError):
 14 |         OpenBanditDataset(behavior_policy="aaa", campaign="all")
 15 | 
 16 |     # campaign
 17 |     with pytest.raises(ValueError):
 18 |         OpenBanditDataset(behavior_policy="random", campaign="aaa")
 19 | 
 20 |     # data_path
 21 |     with pytest.raises(ValueError):
 22 |         OpenBanditDataset(behavior_policy="random", campaign="all", data_path=5)
 23 | 
 24 |     # load_raw_data
 25 |     obd = OpenBanditDataset(behavior_policy="random", campaign="all")
 26 |     # check the value exists and has the right type
 27 |     assert (
 28 |         isinstance(obd.data, pd.DataFrame)
 29 |         and isinstance(obd.item_context, pd.DataFrame)
 30 |         and isinstance(obd.action, np.ndarray)
 31 |         and isinstance(obd.position, np.ndarray)
 32 |         and isinstance(obd.reward, np.ndarray)
 33 |         and isinstance(obd.pscore, np.ndarray)
 34 |     )
 35 | 
 36 |     # pre_process (context and action_context)
 37 |     assert isinstance(obd.context, np.ndarray) and isinstance(
 38 |         obd.action_context, np.ndarray
 39 |     )
 40 | 
 41 | 
 42 | def test_obtain_batch_bandit_feedback():
 43 |     # invalid test_size
 44 |     with pytest.raises(ValueError):
 45 |         dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
 46 |         dataset.obtain_batch_bandit_feedback(is_timeseries_split=True, test_size=1.3)
 47 | 
 48 |     with pytest.raises(ValueError):
 49 |         dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
 50 |         dataset.obtain_batch_bandit_feedback(is_timeseries_split=True, test_size=-0.5)
 51 | 
 52 |     with pytest.raises(TypeError):
 53 |         dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
 54 |         dataset.obtain_batch_bandit_feedback(is_timeseries_split=True, test_size="0.5")
 55 | 
 56 |     with pytest.raises(TypeError):
 57 |         dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
 58 |         dataset.obtain_batch_bandit_feedback(is_timeseries_split="True", test_size=0.5)
 59 | 
 60 |     # existence of keys
 61 |     # is_timeseries_split=False (default)
 62 |     dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
 63 |     bandit_feedback = dataset.obtain_batch_bandit_feedback()
 64 | 
 65 |     assert "n_rounds" in bandit_feedback.keys()
 66 |     assert "n_actions" in bandit_feedback.keys()
 67 |     assert "action" in bandit_feedback.keys()
 68 |     assert "position" in bandit_feedback.keys()
 69 |     assert "reward" in bandit_feedback.keys()
 70 |     assert "pscore" in bandit_feedback.keys()
 71 |     assert "context" in bandit_feedback.keys()
 72 |     assert "action_context" in bandit_feedback.keys()
 73 | 
 74 |     # is_timeseries_split=True
 75 |     bandit_feedback_timeseries = dataset.obtain_batch_bandit_feedback(
 76 |         is_timeseries_split=True
 77 |     )
 78 |     assert isinstance(bandit_feedback_timeseries, Tuple)
 79 |     bandit_feedback_train = bandit_feedback_timeseries[0]
 80 |     bandit_feedback_test = bandit_feedback_timeseries[1]
 81 | 
 82 |     bf_elems = {
 83 |         "n_rounds",
 84 |         "n_actions",
 85 |         "action",
 86 |         "position",
 87 |         "reward",
 88 |         "pscore",
 89 |         "context",
 90 |         "action_context",
 91 |     }
 92 |     assert all(k in bandit_feedback_train.keys() for k in bf_elems)
 93 |     assert all(k in bandit_feedback_test.keys() for k in bf_elems)
 94 | 
 95 | 
 96 | def test_calc_on_policy_policy_value_estimate():
 97 |     ground_truth_policy_value = OpenBanditDataset.calc_on_policy_policy_value_estimate(
 98 |         behavior_policy="random", campaign="all"
 99 |     )
100 |     assert isinstance(ground_truth_policy_value, float)
101 | 
102 | 
103 | def test_sample_bootstrap_bandit_feedback():
104 |     with pytest.raises(ValueError):
105 |         dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
106 |         dataset.sample_bootstrap_bandit_feedback(
107 |             is_timeseries_split=True, test_size=1.3
108 |         )
109 | 
110 |     with pytest.raises(ValueError):
111 |         dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
112 |         dataset.sample_bootstrap_bandit_feedback(
113 |             is_timeseries_split=True, test_size=-0.5
114 |         )
115 | 
116 |     with pytest.raises(ValueError):
117 |         dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
118 |         dataset.sample_bootstrap_bandit_feedback(sample_size=-50)
119 | 
120 |     with pytest.raises(TypeError):
121 |         dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
122 |         dataset.sample_bootstrap_bandit_feedback(sample_size=50.0)
123 | 
124 |     with pytest.raises(ValueError):
125 |         dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
126 |         dataset.sample_bootstrap_bandit_feedback(sample_size=10000000)
127 | 
128 |     dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
129 |     bandit_feedback = dataset.obtain_batch_bandit_feedback()
130 |     bootstrap_bf = dataset.sample_bootstrap_bandit_feedback()
131 | 
132 |     bf_keys = {"action", "position", "reward", "pscore", "context"}
133 |     for k in bf_keys:
134 |         assert len(bandit_feedback[k]) == len(bootstrap_bf[k])
135 | 
136 |     bandit_feedback_timeseries: Dict = dataset.obtain_batch_bandit_feedback(
137 |         is_timeseries_split=True
138 |     )[0]
139 |     bootstrap_bf_timeseries = dataset.sample_bootstrap_bandit_feedback(
140 |         is_timeseries_split=True
141 |     )
142 |     for k in bf_keys:
143 |         assert len(bandit_feedback_timeseries[k]) == len(bootstrap_bf_timeseries[k])
144 | 
145 |     sample_size = 1000
146 |     dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
147 |     bootstrap_bf = dataset.sample_bootstrap_bandit_feedback(sample_size=sample_size)
148 |     assert bootstrap_bf["n_rounds"] == sample_size
149 |     for k in bf_keys:
150 |         assert len(bootstrap_bf[k]) == sample_size
151 | 


--------------------------------------------------------------------------------
/tests/ope/conftest.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import os
  3 | from typing import Set
  4 | 
  5 | import numpy as np
  6 | import pytest
  7 | from scipy import special
  8 | from sklearn.utils import check_random_state
  9 | 
 10 | from obp.dataset import linear_behavior_policy
 11 | from obp.dataset import logistic_reward_function
 12 | from obp.dataset import SyntheticBanditDataset
 13 | from obp.dataset import SyntheticBanditDatasetWithActionEmbeds
 14 | from obp.dataset import SyntheticContinuousBanditDataset
 15 | from obp.dataset import SyntheticMultiLoggersBanditDataset
 16 | from obp.dataset import SyntheticSlateBanditDataset
 17 | from obp.policy import Random
 18 | from obp.types import BanditFeedback
 19 | from obp.utils import sigmoid
 20 | 
 21 | 
 22 | # resolve ImportMismatchError when using virtual environment
 23 | os.environ["PY_IGNORE_IMPORTMISMATCH"] = "1"
 24 | 
 25 | 
 26 | # generate synthetic bandit dataset using SyntheticBanditDataset
 27 | @pytest.fixture(scope="session")
 28 | def synthetic_bandit_feedback() -> BanditFeedback:
 29 |     n_actions = 10
 30 |     dim_context = 5
 31 |     random_state = 12345
 32 |     n_rounds = 10000
 33 |     dataset = SyntheticBanditDataset(
 34 |         n_actions=n_actions,
 35 |         dim_context=dim_context,
 36 |         reward_function=logistic_reward_function,
 37 |         behavior_policy_function=linear_behavior_policy,
 38 |         random_state=random_state,
 39 |     )
 40 |     bandit_feedback = dataset.obtain_batch_bandit_feedback(n_rounds=n_rounds)
 41 |     return bandit_feedback
 42 | 
 43 | 
 44 | # generate synthetic slate bandit dataset using SyntheticSlateBanditDataset
 45 | @pytest.fixture(scope="session")
 46 | def synthetic_slate_bandit_feedback() -> BanditFeedback:
 47 |     # set parameters
 48 |     n_unique_action = 10
 49 |     len_list = 3
 50 |     dim_context = 2
 51 |     reward_type = "binary"
 52 |     random_state = 12345
 53 |     n_rounds = 100
 54 |     dataset = SyntheticSlateBanditDataset(
 55 |         n_unique_action=n_unique_action,
 56 |         len_list=len_list,
 57 |         dim_context=dim_context,
 58 |         reward_type=reward_type,
 59 |         random_state=random_state,
 60 |     )
 61 |     # obtain feedback
 62 |     bandit_feedback = dataset.obtain_batch_bandit_feedback(n_rounds=n_rounds)
 63 |     return bandit_feedback
 64 | 
 65 | 
 66 | # generate synthetic continuous bandit dataset using SyntheticContinuousBanditDataset
 67 | @pytest.fixture(scope="session")
 68 | def synthetic_continuous_bandit_feedback() -> BanditFeedback:
 69 |     # set parameters
 70 |     dim_context = 2
 71 |     random_state = 12345
 72 |     n_rounds = 100
 73 |     min_action_value = -10
 74 |     max_action_value = 10
 75 |     dataset = SyntheticContinuousBanditDataset(
 76 |         dim_context=dim_context,
 77 |         min_action_value=min_action_value,
 78 |         max_action_value=max_action_value,
 79 |         random_state=random_state,
 80 |     )
 81 |     # obtain feedback
 82 |     bandit_feedback = dataset.obtain_batch_bandit_feedback(n_rounds=n_rounds)
 83 |     return bandit_feedback
 84 | 
 85 | 
 86 | @pytest.fixture(scope="session")
 87 | def synthetic_multi_bandit_feedback() -> BanditFeedback:
 88 |     n_actions = 10
 89 |     dim_context = 5
 90 |     betas = [-10, -5, 0, 5, 10]
 91 |     rhos = [1, 2, 3, 2, 1]
 92 |     random_state = 12345
 93 |     n_rounds = 10000
 94 |     dataset = SyntheticMultiLoggersBanditDataset(
 95 |         n_actions=n_actions,
 96 |         dim_context=dim_context,
 97 |         betas=betas,
 98 |         rhos=rhos,
 99 |         reward_function=logistic_reward_function,
100 |         random_state=random_state,
101 |     )
102 |     bandit_feedback = dataset.obtain_batch_bandit_feedback(n_rounds=n_rounds)
103 |     return bandit_feedback
104 | 
105 | 
106 | @pytest.fixture(scope="session")
107 | def synthetic_bandit_feedback_with_embed() -> BanditFeedback:
108 |     n_actions = 10
109 |     dim_context = 5
110 |     n_cat_dim = 3
111 |     n_cat_per_dim = 5
112 |     random_state = 12345
113 |     n_rounds = 10000
114 |     dataset = SyntheticBanditDatasetWithActionEmbeds(
115 |         n_actions=n_actions,
116 |         dim_context=dim_context,
117 |         n_cat_dim=n_cat_dim,
118 |         n_cat_per_dim=n_cat_per_dim,
119 |         reward_function=logistic_reward_function,
120 |         random_state=random_state,
121 |     )
122 |     bandit_feedback = dataset.obtain_batch_bandit_feedback(n_rounds=n_rounds)
123 |     return bandit_feedback
124 | 
125 | 
126 | # make the expected reward of synthetic bandit feedback close to that of the Open Bandit Dataset
127 | @pytest.fixture(scope="session")
128 | def fixed_synthetic_bandit_feedback(synthetic_bandit_feedback) -> BanditFeedback:
129 |     # set random
130 |     random_state = 12345
131 |     random_ = check_random_state(random_state)
132 |     # copy synthetic bandit feedback
133 |     bandit_feedback = copy.deepcopy(synthetic_bandit_feedback)
134 |     # expected reward would be about 0.65%, which is close to that of the ZOZO dataset
135 |     logit = special.logit(bandit_feedback["expected_reward"])
136 |     bandit_feedback["expected_reward"] = sigmoid(logit - 4.0)
137 |     expected_reward_factual = bandit_feedback["expected_reward"][
138 |         np.arange(bandit_feedback["n_rounds"]), bandit_feedback["action"]
139 |     ]
140 |     bandit_feedback["reward"] = random_.binomial(n=1, p=expected_reward_factual)
141 |     return bandit_feedback
142 | 
143 | 
144 | # key set of bandit feedback data
145 | @pytest.fixture(scope="session")
146 | def feedback_key_set() -> Set[str]:
147 |     return {
148 |         "action",
149 |         "action_context",
150 |         "context",
151 |         "expected_reward",
152 |         "n_actions",
153 |         "n_rounds",
154 |         "position",
155 |         "pi_b",
156 |         "pscore",
157 |         "reward",
158 |     }
159 | 
160 | 
161 | # random evaluation policy
162 | @pytest.fixture(scope="session")
163 | def random_action_dist(synthetic_bandit_feedback) -> np.ndarray:
164 |     n_actions = synthetic_bandit_feedback["n_actions"]
165 |     evaluation_policy = Random(n_actions=n_actions, len_list=1)
166 |     action_dist = evaluation_policy.compute_batch_action_dist(
167 |         n_rounds=synthetic_bandit_feedback["n_rounds"]
168 |     )
169 |     return action_dist
170 | 
171 | 
172 | def generate_action_dist(i, j, k):
173 |     x = np.random.uniform(size=(i, j, k))
174 |     action_dist = x / x.sum(axis=1)[:, np.newaxis, :]
175 |     return action_dist
176 | 


--------------------------------------------------------------------------------
/tests/ope/hyperparams.yaml:
--------------------------------------------------------------------------------
 1 | lightgbm:
 2 |   n_estimators: 100
 3 |   learning_rate: 0.01
 4 |   max_depth: 5
 5 |   min_samples_leaf: 10
 6 |   random_state: 12345
 7 | logistic_regression:
 8 |   max_iter: 10000
 9 |   C: 1000
10 |   random_state: 12345
11 | random_forest:
12 |   n_estimators: 100
13 |   max_depth: 5
14 |   min_samples_leaf: 10
15 |   random_state: 12345
16 | ridge:
17 |   alpha: 0.2
18 |   random_state: 12345
19 | 


--------------------------------------------------------------------------------
/tests/ope/hyperparams_slate.yaml:
--------------------------------------------------------------------------------
 1 | lightgbm:
 2 |   n_estimators: 100
 3 |   learning_rate: 0.01
 4 |   max_depth: 5
 5 |   min_samples_leaf: 10
 6 |   random_state: 12345
 7 | random_forest:
 8 |   n_estimators: 100
 9 |   max_depth: 5
10 |   min_samples_leaf: 10
11 |   random_state: 12345
12 | ridge:
13 |   alpha: 0.2
14 |   random_state: 12345
15 | 


--------------------------------------------------------------------------------
/tests/ope/test_dm_estimators.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | from conftest import generate_action_dist
  4 | import numpy as np
  5 | import pytest
  6 | 
  7 | from obp.ope import DirectMethod
  8 | from obp.types import BanditFeedback
  9 | 
 10 | 
 11 | # action_dist, position, estimated_rewards_by_reg_model, description
 12 | invalid_input_of_dm = [
 13 |     (
 14 |         generate_action_dist(5, 4, 3),
 15 |         np.zeros(5, dtype=int),
 16 |         np.zeros((5, 4, 2)),  #
 17 |         "Expected `estimated_rewards_by_reg_model.shape == action_dist.shape`, but found it False",
 18 |     ),
 19 |     (
 20 |         generate_action_dist(5, 4, 3),
 21 |         np.zeros(5, dtype=int),
 22 |         None,  #
 23 |         "`estimated_rewards_by_reg_model` must be 3D array",
 24 |     ),
 25 |     (
 26 |         generate_action_dist(5, 4, 3),
 27 |         np.zeros(5, dtype=int),
 28 |         "4",  #
 29 |         "`estimated_rewards_by_reg_model` must be 3D array",
 30 |     ),
 31 | ]
 32 | 
 33 | 
 34 | @pytest.mark.parametrize(
 35 |     "action_dist, position, estimated_rewards_by_reg_model, description",
 36 |     invalid_input_of_dm,
 37 | )
 38 | def test_dm_using_invalid_input_data(
 39 |     action_dist: np.ndarray,
 40 |     position: np.ndarray,
 41 |     estimated_rewards_by_reg_model: np.ndarray,
 42 |     description: str,
 43 | ) -> None:
 44 |     dm = DirectMethod()
 45 |     with pytest.raises(ValueError, match=f"{description}*"):
 46 |         _ = dm.estimate_policy_value(
 47 |             action_dist=action_dist,
 48 |             position=position,
 49 |             estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
 50 |         )
 51 |     with pytest.raises(ValueError, match=f"{description}*"):
 52 |         _ = dm.estimate_interval(
 53 |             action_dist=action_dist,
 54 |             position=position,
 55 |             estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
 56 |         )
 57 | 
 58 | 
 59 | def test_dm_using_random_evaluation_policy(
 60 |     synthetic_bandit_feedback: BanditFeedback, random_action_dist: np.ndarray
 61 | ) -> None:
 62 |     """
 63 |     Test the performance of the direct method using synthetic bandit data and random evaluation policy
 64 |     """
 65 |     expected_reward = synthetic_bandit_feedback["expected_reward"][:, :, np.newaxis]
 66 |     action_dist = random_action_dist
 67 |     # compute ground truth policy value using expected reward
 68 |     q_pi_e = np.average(expected_reward[:, :, 0], weights=action_dist[:, :, 0], axis=1)
 69 |     # compute statistics of ground truth policy value
 70 |     gt_mean = q_pi_e.mean()
 71 |     # prepare dm
 72 |     dm = DirectMethod()
 73 |     # prepare input dict
 74 |     input_dict = {
 75 |         k: v
 76 |         for k, v in synthetic_bandit_feedback.items()
 77 |         if k in ["reward", "action", "pscore", "position"]
 78 |     }
 79 |     input_dict["action_dist"] = action_dist
 80 |     # estimated_rewards_by_reg_model is required
 81 |     with pytest.raises(
 82 |         TypeError,
 83 |         match=re.escape(
 84 |             "estimate_policy_value() missing 1 required positional argument: 'estimated_rewards_by_reg_model'"
 85 |         ),
 86 |     ):
 87 |         _ = dm.estimate_policy_value(**input_dict)
 88 |     # add estimated_rewards_by_reg_model
 89 |     input_dict["estimated_rewards_by_reg_model"] = expected_reward
 90 |     # check expectation
 91 |     estimated_policy_value = dm.estimate_policy_value(**input_dict)
 92 |     assert (
 93 |         gt_mean == estimated_policy_value
 94 |     ), "DM should be perfect when the regression model is perfect"
 95 |     # remove unnecessary keys
 96 |     del input_dict["reward"]
 97 |     del input_dict["pscore"]
 98 |     del input_dict["action"]
 99 |     estimated_policy_value = dm.estimate_policy_value(**input_dict)
100 |     assert (
101 |         gt_mean == estimated_policy_value
102 |     ), "DM should be perfect when the regression model is perfect"
103 | 


--------------------------------------------------------------------------------
/tests/ope/test_kernel_functions.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy import integrate
 3 | 
 4 | from obp.ope import cosine_kernel
 5 | from obp.ope import epanechnikov_kernel
 6 | from obp.ope import gaussian_kernel
 7 | from obp.ope import triangular_kernel
 8 | 
 9 | 
10 | def test_kernel_functions():
11 |     # triangular
12 |     assert np.isclose(
13 |         integrate.quad(lambda x: triangular_kernel(x), -np.inf, np.inf)[0], 1
14 |     )
15 |     assert np.isclose(
16 |         integrate.quad(lambda x: x * triangular_kernel(x), -np.inf, np.inf)[0], 0
17 |     )
18 |     assert integrate.quad(lambda x: triangular_kernel(x) ** 2, -np.inf, np.inf)[0] > 0
19 | 
20 |     # epanechnikov
21 |     assert np.isclose(
22 |         integrate.quad(lambda x: epanechnikov_kernel(x), -np.inf, np.inf)[0], 1
23 |     )
24 |     assert np.isclose(
25 |         integrate.quad(lambda x: x * epanechnikov_kernel(x), -np.inf, np.inf)[0], 0
26 |     )
27 |     assert integrate.quad(lambda x: epanechnikov_kernel(x) ** 2, -np.inf, np.inf)[0] > 0
28 | 
29 |     # gaussian
30 |     assert np.isclose(
31 |         integrate.quad(lambda x: gaussian_kernel(x), -np.inf, np.inf)[0], 1
32 |     )
33 |     assert np.isclose(
34 |         integrate.quad(lambda x: x * gaussian_kernel(x), -np.inf, np.inf)[0], 0
35 |     )
36 |     assert integrate.quad(lambda x: gaussian_kernel(x) ** 2, -np.inf, np.inf)[0] > 0
37 | 
38 |     # cosine
39 |     assert np.isclose(integrate.quad(lambda x: cosine_kernel(x), -np.inf, np.inf)[0], 1)
40 |     assert np.isclose(
41 |         integrate.quad(lambda x: x * cosine_kernel(x), -np.inf, np.inf)[0], 0
42 |     )
43 |     assert integrate.quad(lambda x: cosine_kernel(x) ** 2, -np.inf, np.inf)[0] > 0
44 | 


--------------------------------------------------------------------------------
/tests/policy/test_contextfree.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | 
  4 | from obp.policy.contextfree import BernoulliTS
  5 | from obp.policy.contextfree import EpsilonGreedy
  6 | from obp.policy.contextfree import Random
  7 | from obp.policy.policy_type import PolicyType
  8 | 
  9 | 
 10 | def test_contextfree_base_exception():
 11 |     # invalid n_actions
 12 |     with pytest.raises(ValueError):
 13 |         EpsilonGreedy(n_actions=0)
 14 | 
 15 |     with pytest.raises(TypeError):
 16 |         EpsilonGreedy(n_actions="3")
 17 | 
 18 |     # invalid len_list
 19 |     with pytest.raises(ValueError):
 20 |         EpsilonGreedy(n_actions=2, len_list=-1)
 21 | 
 22 |     with pytest.raises(TypeError):
 23 |         EpsilonGreedy(n_actions=2, len_list="5")
 24 | 
 25 |     # invalid batch_size
 26 |     with pytest.raises(ValueError):
 27 |         EpsilonGreedy(n_actions=2, batch_size=-3)
 28 | 
 29 |     with pytest.raises(TypeError):
 30 |         EpsilonGreedy(n_actions=2, batch_size="3")
 31 | 
 32 |     # invalid relationship between n_actions and len_list
 33 |     with pytest.raises(ValueError):
 34 |         EpsilonGreedy(n_actions=5, len_list=10)
 35 | 
 36 |     with pytest.raises(ValueError):
 37 |         EpsilonGreedy(n_actions=2, len_list=3)
 38 | 
 39 | 
 40 | def test_egreedy_normal_epsilon():
 41 | 
 42 |     policy1 = EpsilonGreedy(n_actions=2)
 43 |     assert 0 <= policy1.epsilon <= 1
 44 | 
 45 |     policy2 = EpsilonGreedy(n_actions=3, epsilon=0.3)
 46 |     assert 0 <= policy2.epsilon <= 1
 47 | 
 48 |     # policy type
 49 |     assert EpsilonGreedy(n_actions=2).policy_type == PolicyType.CONTEXT_FREE
 50 | 
 51 | 
 52 | def test_egreedy_abnormal_epsilon():
 53 | 
 54 |     with pytest.raises(ValueError):
 55 |         EpsilonGreedy(n_actions=2, epsilon=1.2)
 56 | 
 57 |     with pytest.raises(ValueError):
 58 |         EpsilonGreedy(n_actions=5, epsilon=-0.2)
 59 | 
 60 | 
 61 | def test_egreedy_select_action_exploitation():
 62 |     trial_num = 50
 63 |     policy = EpsilonGreedy(n_actions=2, epsilon=0.0)
 64 |     policy.action_counts = np.array([3, 3])
 65 |     policy.reward_counts = np.array([3, 0])
 66 |     for _ in range(trial_num):
 67 |         assert policy.select_action()[0] == 0
 68 | 
 69 | 
 70 | def test_egreedy_select_action_exploration():
 71 |     trial_num = 50
 72 |     policy = EpsilonGreedy(n_actions=2, epsilon=1.0)
 73 |     policy.action_counts = np.array([3, 3])
 74 |     policy.reward_counts = np.array([3, 0])
 75 |     selected_action = [policy.select_action() for _ in range(trial_num)]
 76 |     assert 0 < sum(selected_action)[0] < trial_num
 77 | 
 78 | 
 79 | def test_egreedy_update_params():
 80 |     policy = EpsilonGreedy(n_actions=2, epsilon=1.0)
 81 |     policy.action_counts_temp = np.array([4, 3])
 82 |     policy.action_counts = np.copy(policy.action_counts_temp)
 83 |     policy.reward_counts_temp = np.array([2.0, 0.0])
 84 |     policy.reward_counts = np.copy(policy.reward_counts_temp)
 85 |     action = 0
 86 |     reward = 1.0
 87 |     policy.update_params(action, reward)
 88 |     assert np.array_equal(policy.action_counts, np.array([5, 3]))
 89 |     assert np.allclose(policy.reward_counts, np.array([2.0 + reward, 0.0]))
 90 | 
 91 | 
 92 | def test_random_compute_batch_action_dist():
 93 |     n_actions = 10
 94 |     len_list = 5
 95 |     n_rounds = 100
 96 |     policy = Random(n_actions=n_actions, len_list=len_list)
 97 |     action_dist = policy.compute_batch_action_dist(n_rounds=n_rounds)
 98 |     assert action_dist.shape[0] == n_rounds
 99 |     assert action_dist.shape[1] == n_actions
100 |     assert action_dist.shape[2] == len_list
101 |     assert len(np.unique(action_dist)) == 1
102 |     assert np.unique(action_dist)[0] == 1 / n_actions
103 | 
104 | 
105 | def test_bernoulli_ts_zozotown_prior():
106 | 
107 |     with pytest.raises(Exception):
108 |         BernoulliTS(n_actions=2, is_zozotown_prior=True)
109 | 
110 |     policy_all = BernoulliTS(n_actions=2, is_zozotown_prior=True, campaign="all")
111 |     # check whether it is not an non-informative prior parameter (i.e., default parameter)
112 |     assert len(np.unique(policy_all.alpha)) != 1
113 |     assert len(np.unique(policy_all.beta)) != 1
114 | 
115 |     policy_men = BernoulliTS(n_actions=2, is_zozotown_prior=True, campaign="men")
116 |     assert len(np.unique(policy_men.alpha)) != 1
117 |     assert len(np.unique(policy_men.beta)) != 1
118 | 
119 |     policy_women = BernoulliTS(n_actions=2, is_zozotown_prior=True, campaign="women")
120 |     assert len(np.unique(policy_women.alpha)) != 1
121 |     assert len(np.unique(policy_women.beta)) != 1
122 | 
123 | 
124 | def test_bernoulli_ts_select_action():
125 |     # invalid relationship between n_actions and len_list
126 |     with pytest.raises(ValueError):
127 |         BernoulliTS(n_actions=5, len_list=10)
128 | 
129 |     with pytest.raises(ValueError):
130 |         BernoulliTS(n_actions=2, len_list=3)
131 | 
132 |     policy1 = BernoulliTS(n_actions=3, len_list=3)
133 |     assert np.allclose(np.sort(policy1.select_action()), np.array([0, 1, 2]))
134 | 
135 |     policy = BernoulliTS(n_actions=5, len_list=3)
136 |     assert len(policy.select_action()) == 3
137 | 
138 | 
139 | def test_bernoulli_ts_update_params():
140 |     policy = BernoulliTS(n_actions=2)
141 |     policy.action_counts_temp = np.array([4, 3])
142 |     policy.action_counts = np.copy(policy.action_counts_temp)
143 |     policy.reward_counts_temp = np.array([2.0, 0.0])
144 |     policy.reward_counts = np.copy(policy.reward_counts_temp)
145 |     action = 0
146 |     reward = 1.0
147 |     policy.update_params(action, reward)
148 |     assert np.array_equal(policy.action_counts, np.array([5, 3]))
149 |     # in bernoulli ts, reward_counts is defined as the sum of observed rewards for each action
150 |     next_reward = 2.0 + reward
151 |     assert np.allclose(policy.reward_counts, np.array([next_reward, 0.0]))
152 | 
153 | 
154 | def test_bernoulli_ts_compute_batch_action_dist():
155 |     n_rounds = 10
156 |     n_actions = 5
157 |     len_list = 2
158 |     policy = BernoulliTS(n_actions=n_actions, len_list=len_list)
159 |     action_dist = policy.compute_batch_action_dist(n_rounds=n_rounds, n_sim=30)
160 |     assert action_dist.shape[0] == n_rounds
161 |     assert action_dist.shape[1] == n_actions
162 |     assert action_dist.shape[2] == len_list
163 | 


--------------------------------------------------------------------------------
/tests/policy/test_logistic.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | 
  4 | from obp.policy.logistic import LogisticEpsilonGreedy
  5 | from obp.policy.logistic import LogisticTS
  6 | from obp.policy.logistic import LogisticUCB
  7 | from obp.policy.logistic import MiniBatchLogisticRegression
  8 | 
  9 | 
 10 | def test_logistic_base_exception():
 11 |     # invalid dim
 12 |     with pytest.raises(ValueError):
 13 |         LogisticEpsilonGreedy(n_actions=2, dim=-3)
 14 | 
 15 |     with pytest.raises(ValueError):
 16 |         LogisticEpsilonGreedy(n_actions=2, dim=0)
 17 | 
 18 |     with pytest.raises(TypeError):
 19 |         LogisticEpsilonGreedy(n_actions=2, dim="3")
 20 | 
 21 |     # invalid n_actions
 22 |     with pytest.raises(ValueError):
 23 |         LogisticEpsilonGreedy(n_actions=-3, dim=2)
 24 | 
 25 |     with pytest.raises(ValueError):
 26 |         LogisticEpsilonGreedy(n_actions=1, dim=2)
 27 | 
 28 |     with pytest.raises(TypeError):
 29 |         LogisticEpsilonGreedy(n_actions="2", dim=2)
 30 | 
 31 |     # invalid len_list
 32 |     with pytest.raises(ValueError):
 33 |         LogisticEpsilonGreedy(n_actions=2, dim=2, len_list=-3)
 34 | 
 35 |     with pytest.raises(ValueError):
 36 |         LogisticEpsilonGreedy(n_actions=2, dim=2, len_list=0)
 37 | 
 38 |     with pytest.raises(TypeError):
 39 |         LogisticEpsilonGreedy(n_actions=2, dim=2, len_list="3")
 40 | 
 41 |     # invalid batch_size
 42 |     with pytest.raises(ValueError):
 43 |         LogisticEpsilonGreedy(n_actions=2, dim=2, batch_size=-2)
 44 | 
 45 |     with pytest.raises(ValueError):
 46 |         LogisticEpsilonGreedy(n_actions=2, dim=2, batch_size=0)
 47 | 
 48 |     with pytest.raises(TypeError):
 49 |         LogisticEpsilonGreedy(n_actions=2, dim=2, batch_size="10")
 50 | 
 51 |     # invalid relationship between n_actions and len_list
 52 |     with pytest.raises(ValueError):
 53 |         LogisticEpsilonGreedy(n_actions=5, len_list=10, dim=2)
 54 | 
 55 |     with pytest.raises(ValueError):
 56 |         LogisticEpsilonGreedy(n_actions=2, len_list=3, dim=2)
 57 | 
 58 | 
 59 | def test_logistic_epsilon_normal_epsilon():
 60 | 
 61 |     policy1 = LogisticEpsilonGreedy(n_actions=2, dim=2)
 62 |     assert 0 <= policy1.epsilon <= 1
 63 | 
 64 |     policy2 = LogisticEpsilonGreedy(n_actions=2, dim=2, epsilon=0.5)
 65 |     assert policy2.epsilon == 0.5
 66 | 
 67 | 
 68 | def test_logistic_epsilon_abnormal_epsilon():
 69 | 
 70 |     with pytest.raises(ValueError):
 71 |         LogisticEpsilonGreedy(n_actions=2, dim=2, epsilon=1.3)
 72 | 
 73 |     with pytest.raises(ValueError):
 74 |         LogisticEpsilonGreedy(n_actions=2, dim=2, epsilon=-0.3)
 75 | 
 76 | 
 77 | def test_logistic_epsilon_each_action_model():
 78 |     n_actions = 3
 79 |     policy = LogisticEpsilonGreedy(n_actions=n_actions, dim=2, epsilon=0.5)
 80 |     for i in range(n_actions):
 81 |         assert isinstance(policy.model_list[i], MiniBatchLogisticRegression)
 82 | 
 83 | 
 84 | def test_logistic_epsilon_select_action_exploitation():
 85 |     trial_num = 50
 86 |     policy = LogisticEpsilonGreedy(n_actions=2, dim=2, epsilon=0.0)
 87 |     context = np.array([1.0, 1.0]).reshape(1, -1)
 88 |     policy.update_params(action=0, reward=1.0, context=context)
 89 |     policy.update_params(action=0, reward=1.0, context=context)
 90 |     policy.update_params(action=1, reward=1.0, context=context)
 91 |     policy.update_params(action=1, reward=0.0, context=context)
 92 |     for _ in range(trial_num):
 93 |         assert policy.select_action(context=context)[0] == 0
 94 | 
 95 | 
 96 | def test_logistic_epsilon_select_action_exploration():
 97 |     trial_num = 50
 98 |     policy = LogisticEpsilonGreedy(n_actions=2, dim=2, epsilon=1.0)
 99 |     context = np.array([1.0, 1.0]).reshape(1, -1)
100 |     policy.update_params(action=0, reward=1.0, context=context)
101 |     policy.update_params(action=0, reward=1.0, context=context)
102 |     policy.update_params(action=1, reward=1.0, context=context)
103 |     policy.update_params(action=1, reward=0.0, context=context)
104 |     selected_action = [policy.select_action(context=context) for _ in range(trial_num)]
105 |     assert 0 < sum(selected_action)[0] < trial_num
106 | 
107 | 
108 | def test_logistic_ucb_initialize():
109 |     # note that the meaning of epsilon is different from that of LogisticEpsilonGreedy
110 |     with pytest.raises(ValueError):
111 |         LogisticUCB(n_actions=2, dim=2, epsilon=-0.2)
112 | 
113 |     n_actions = 3
114 |     policy = LogisticUCB(n_actions=n_actions, dim=2, epsilon=0.5)
115 |     for i in range(n_actions):
116 |         assert isinstance(policy.model_list[i], MiniBatchLogisticRegression)
117 | 
118 | 
119 | def test_logistic_ucb_select_action():
120 |     dim = 3
121 |     len_list = 2
122 |     policy = LogisticUCB(n_actions=4, dim=dim, len_list=2, epsilon=0.0)
123 |     context = np.ones(dim).reshape(1, -1)
124 |     action = policy.select_action(context=context)
125 |     assert len(action) == len_list
126 | 
127 | 
128 | def test_logistic_ts_initialize():
129 |     n_actions = 3
130 |     policy = LogisticTS(n_actions=n_actions, dim=2)
131 |     for i in range(n_actions):
132 |         assert isinstance(policy.model_list[i], MiniBatchLogisticRegression)
133 | 
134 | 
135 | def test_logistic_ts_select_action():
136 |     dim = 3
137 |     len_list = 2
138 |     policy = LogisticTS(n_actions=4, dim=dim, len_list=2)
139 |     context = np.ones(dim).reshape(1, -1)
140 |     action = policy.select_action(context=context)
141 |     assert len(action) == len_list
142 | 


--------------------------------------------------------------------------------
/tests/policy/test_offline_learner_continuous_performance.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | from typing import Optional
  3 | from typing import Tuple
  4 | from typing import Union
  5 | 
  6 | from joblib import delayed
  7 | from joblib import Parallel
  8 | import numpy as np
  9 | import pytest
 10 | 
 11 | from obp.dataset import linear_behavior_policy_continuous
 12 | from obp.dataset import linear_reward_funcion_continuous
 13 | from obp.dataset import SyntheticContinuousBanditDataset
 14 | from obp.policy import BaseContinuousOfflinePolicyLearner
 15 | from obp.policy import ContinuousNNPolicyLearner
 16 | 
 17 | 
 18 | # n_rounds, dim_context, action_noise, reward_noise, min_action_value, max_action_value, pg_method, bandwidth
 19 | offline_experiment_configurations = [
 20 |     (
 21 |         1500,
 22 |         10,
 23 |         1.0,
 24 |         1.0,
 25 |         -10.0,
 26 |         10.0,
 27 |         "dpg",
 28 |         None,
 29 |     ),
 30 |     (
 31 |         2000,
 32 |         5,
 33 |         1.0,
 34 |         1.0,
 35 |         0.0,
 36 |         100.0,
 37 |         "dpg",
 38 |         None,
 39 |     ),
 40 | ]
 41 | 
 42 | 
 43 | @dataclass
 44 | class RandomPolicy(BaseContinuousOfflinePolicyLearner):
 45 |     output_space: Tuple[Union[int, float], Union[int, float]] = None
 46 | 
 47 |     def fit(self):
 48 |         raise NotImplementedError
 49 | 
 50 |     def predict(self, context: np.ndarray) -> np.ndarray:
 51 | 
 52 |         n_rounds = context.shape[0]
 53 |         predicted_actions = np.random.uniform(
 54 |             self.output_space[0], self.output_space[1], size=n_rounds
 55 |         )
 56 |         return predicted_actions
 57 | 
 58 | 
 59 | @pytest.mark.parametrize(
 60 |     "n_rounds, dim_context, action_noise, reward_noise, min_action_value, max_action_value, pg_method, bandwidth",
 61 |     offline_experiment_configurations,
 62 | )
 63 | def test_offline_nn_policy_learner_performance(
 64 |     n_rounds: int,
 65 |     dim_context: int,
 66 |     action_noise: float,
 67 |     reward_noise: float,
 68 |     min_action_value: float,
 69 |     max_action_value: float,
 70 |     pg_method: str,
 71 |     bandwidth: Optional[float],
 72 | ) -> None:
 73 |     def process(i: int):
 74 |         # synthetic data generator
 75 |         dataset = SyntheticContinuousBanditDataset(
 76 |             dim_context=dim_context,
 77 |             action_noise=action_noise,
 78 |             reward_noise=reward_noise,
 79 |             min_action_value=min_action_value,
 80 |             max_action_value=max_action_value,
 81 |             reward_function=linear_reward_funcion_continuous,
 82 |             behavior_policy_function=linear_behavior_policy_continuous,
 83 |             random_state=i,
 84 |         )
 85 |         # define evaluation policy using NNPolicyLearner
 86 |         nn_policy = ContinuousNNPolicyLearner(
 87 |             dim_context=dim_context,
 88 |             pg_method=pg_method,
 89 |             bandwidth=bandwidth,
 90 |             output_space=(min_action_value, max_action_value),
 91 |             hidden_layer_size=(10, 10),
 92 |             learning_rate_init=0.001,
 93 |             max_iter=200,
 94 |             solver="sgd",
 95 |             q_func_estimator_hyperparams={"max_iter": 200},
 96 |         )
 97 |         # baseline method 1. RandomPolicy
 98 |         random_policy = RandomPolicy(output_space=(min_action_value, max_action_value))
 99 |         # sample new training and test sets of synthetic logged bandit data
100 |         bandit_feedback_train = dataset.obtain_batch_bandit_feedback(
101 |             n_rounds=n_rounds,
102 |         )
103 |         bandit_feedback_test = dataset.obtain_batch_bandit_feedback(
104 |             n_rounds=n_rounds,
105 |         )
106 |         # train the evaluation policy on the training set of the synthetic logged bandit data
107 |         nn_policy.fit(
108 |             context=bandit_feedback_train["context"],
109 |             action=bandit_feedback_train["action"],
110 |             reward=bandit_feedback_train["reward"],
111 |             pscore=bandit_feedback_train["pscore"],
112 |         )
113 |         # predict the action decisions for the test set of the synthetic logged bandit data
114 |         actions_predicted_by_nn_policy = nn_policy.predict(
115 |             context=bandit_feedback_test["context"],
116 |         )
117 |         actions_predicted_by_random = random_policy.predict(
118 |             context=bandit_feedback_test["context"],
119 |         )
120 |         # get the ground truth policy value for each learner
121 |         gt_nn_policy_learner = dataset.calc_ground_truth_policy_value(
122 |             context=bandit_feedback_test["context"],
123 |             action=actions_predicted_by_nn_policy,
124 |         )
125 |         gt_random_policy = dataset.calc_ground_truth_policy_value(
126 |             context=bandit_feedback_test["context"],
127 |             action=actions_predicted_by_random,
128 |         )
129 | 
130 |         return gt_nn_policy_learner, gt_random_policy
131 | 
132 |     n_runs = 10
133 |     processed = Parallel(
134 |         n_jobs=1,  # PyTorch uses multiple threads
135 |         verbose=0,
136 |     )([delayed(process)(i) for i in np.arange(n_runs)])
137 |     list_gt_nn_policy, list_gt_random = [], []
138 |     for i, ground_truth_policy_values in enumerate(processed):
139 |         gt_nn_policy, gt_random = ground_truth_policy_values
140 |         list_gt_nn_policy.append(gt_nn_policy)
141 |         list_gt_random.append(gt_random)
142 | 
143 |     assert np.mean(list_gt_nn_policy) > np.mean(list_gt_random)
144 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from obp.utils import sample_action_fast
 4 | from obp.utils import softmax
 5 | 
 6 | 
 7 | def test_sample_action_fast():
 8 |     n_rounds = 10
 9 |     n_actions = 5
10 |     n_sim = 100000
11 | 
12 |     true_probs = softmax(np.random.normal(size=(n_rounds, n_actions)))
13 |     sampled_action_list = list()
14 |     for _ in np.arange(n_sim):
15 |         sampled_action_list.append(sample_action_fast(true_probs)[:, np.newaxis])
16 | 
17 |     sampled_action_arr = np.concatenate(sampled_action_list, 1)
18 |     for i in np.arange(n_rounds):
19 |         sampled_action_counts = np.unique(sampled_action_arr[i], return_counts=True)[1]
20 |         empirical_probs = sampled_action_counts / n_sim
21 |         assert np.isclose(true_probs[i], empirical_probs, rtol=5e-2, atol=1e-3).all()
22 | 


--------------------------------------------------------------------------------