├── .Rbuildignore
├── .gitattributes
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── R
    ├── agent.R
    ├── bandit.R
    ├── bandit_basic_bernoulli.R
    ├── bandit_basic_gaussian.R
    ├── bandit_cmab_bernoulli.R
    ├── bandit_cmab_binary.R
    ├── bandit_cmab_hybrid.R
    ├── bandit_cmab_linear.R
    ├── bandit_cmab_logit.R
    ├── bandit_cmab_precaching.R
    ├── bandit_cmab_wheel.R
    ├── bandit_continuum_function.R
    ├── bandit_offline_bootstrapped_replay.R
    ├── bandit_offline_direct_method.R
    ├── bandit_offline_doubly_robust.R
    ├── bandit_offline_propensity_weighting.R
    ├── bandit_offline_replay_evaluator.R
    ├── bandit_offline_replay_evaluator_lookup.R
    ├── functions_generic.R
    ├── functions_utility.R
    ├── history.R
    ├── plot.R
    ├── policy.R
    ├── policy_cmab_lin_epoch_greedy.R
    ├── policy_cmab_lin_epsilon_greedy.R
    ├── policy_cmab_lin_ts_disjoint.R
    ├── policy_cmab_lin_ucb_disjoint.R
    ├── policy_cmab_lin_ucb_disjoint_optimized.R
    ├── policy_cmab_lin_ucb_general.R
    ├── policy_cmab_lin_ucb_hybrid.R
    ├── policy_cmab_lin_ucb_hybrid_optimized.R
    ├── policy_cmab_logit_ts_bootstrap.R
    ├── policy_cmab_probit_ts.R
    ├── policy_cont_lif.R
    ├── policy_fixed.R
    ├── policy_mab_epsilon_first.R
    ├── policy_mab_epsilon_greedy.R
    ├── policy_mab_exp3.R
    ├── policy_mab_gittins_bl.R
    ├── policy_mab_gradient.R
    ├── policy_mab_softmax.R
    ├── policy_mab_ts.R
    ├── policy_mab_ts_bootstrap.R
    ├── policy_mab_ucb1.R
    ├── policy_mab_ucb2.R
    ├── policy_oracle.R
    ├── policy_random.R
    └── simulator.R
├── README.md
├── _pkgdown.yml
├── appveyor.yml
├── codecov.yml
├── contextual.Rproj
├── cran-comments.md
├── demo
    ├── 00Index
    ├── alternative_parallel_backends
    │   ├── azure
    │   │   ├── cluster.json
    │   │   ├── credentials-sample.json
    │   │   ├── demo_azure.R
    │   │   ├── simulator_azure.R
    │   │   └── test_azure_connection.R
    │   ├── redis
    │   │   ├── demo_redis.R
    │   │   └── simulator_redis.R
    │   └── rmpi
    │   │   ├── demo_rmpi.R
    │   │   ├── simulator_rmpi.R
    │   │   └── test_rmpi_connection.R
    ├── demo_bandit_algorithms_for_website_optimization.R
    ├── demo_cmab_policy_comparison_linear_bandit.R
    ├── demo_cmab_policy_comparison_weight_bandit.R
    ├── demo_epsilon_greedy_policy.R
    ├── demo_epsilon_greedy_to_epoch_greedy_policy.R
    ├── demo_lif_bandit.R
    ├── demo_mab_policy_comparison.R
    ├── demo_offline_cmab_alpha_linucb_direct_method.R
    ├── demo_offline_cmab_alpha_linucb_replay.R
    ├── demo_simpsons_paradox_propensity.R
    ├── demo_sine_bandit.R
    ├── demo_subsubclass.R
    ├── demo_sutton_barto.R
    ├── evaluations_on_public_datasets
    │   ├── demo_carskit_depaul.R
    │   ├── demo_movielens_100k.R
    │   └── demo_movielens_10m.R
    ├── offline_bandit_evaluations
    │   ├── demo_offline_bootstrap_replay.R
    │   ├── demo_offline_direct_method.R
    │   ├── demo_offline_doubly_robust.R
    │   └── demo_offline_propensity_score.R
    ├── replication_eckles_kaptein_2014
    │   ├── demo_bootstrap_fig_2.R
    │   └── demo_bootstrap_fig_3.R
    ├── replication_kruijswijk_2018
    │   ├── 1_basic_synthetic_evaluation.R
    │   ├── 2a_main_synthetic_evaluation.R
    │   ├── 2b_dependent_observations_plot_bar.R
    │   ├── 3_offline_bootstrapped_persuasion.R
    │   ├── README.md
    │   ├── bandit_bernoulli.R
    │   ├── bandit_bootstrapped_replay.R
    │   ├── bandit_replay.R
    │   ├── beta_binom_hier_model.stan
    │   ├── policy_pooled_egreedy.R
    │   ├── policy_pooled_thompson.R
    │   └── policy_pooled_ucb.R
    ├── replication_kruijswijk_2019
    │   ├── README.md
    │   ├── bandit_continuum_function_bimodal.R
    │   ├── bandit_continuum_function_unimodal.R
    │   ├── bandit_continuum_offon.R
    │   ├── bandit_continuum_offon_kern.R
    │   ├── demo_lif_bandit.R
    │   ├── demo_tbl_bandit.R
    │   ├── policy_cont_lif_randstart.R
    │   └── policy_tbl.R
    ├── replication_li_2010
    │   ├── 1_import_yahoo_to_monetdb.R
    │   ├── 2_run_simulation.R
    │   ├── 3_plotter.R
    │   ├── 4_plotter.R
    │   ├── alternative_db_scripts
    │   │   ├── 1_import_yahoo_data_to_monetdb_lite.R
    │   │   ├── 2_run_the_simulation_on_monetdb.R
    │   │   ├── 2_run_the_simulation_on_monetdb_lite.R
    │   │   ├── yahoo_to_mysql.R
    │   │   ├── yahoo_to_postgresql.R
    │   │   └── yahoo_to_sqlite.R
    │   ├── demo_yahoo_classes
    │   │   ├── yahoo_bandit.R
    │   │   ├── yahoo_policy_epsilon_greedy.R
    │   │   ├── yahoo_policy_epsilon_greedy_seg.R
    │   │   ├── yahoo_policy_linucb_disjoint.R
    │   │   ├── yahoo_policy_linucb_hybrid.R
    │   │   ├── yahoo_policy_random.R
    │   │   ├── yahoo_policy_ucb1_alpha.R
    │   │   └── yahoo_policy_ucb1_alpha_seg.R
    │   └── demo_yahoo_exploration
    │   │   ├── exploration.R
    │   │   └── plots.R
    └── replication_van_emden_2018
    │   ├── section_2_3.R
    │   ├── section_3_2_1.R
    │   ├── section_3_2_2.R
    │   ├── section_4_2_plot.R
    │   ├── section_5_2.R
    │   ├── section_5_3.R
    │   ├── section_5_4.R
    │   ├── section_6.R
    │   ├── section_7.R
    │   └── section_8.R
├── docs
    ├── 404.html
    ├── LICENSE-text.html
    ├── LICENSE.html
    ├── README.html
    ├── articles
    │   ├── _only_pkgdown_
    │   │   └── faq.html
    │   ├── arxiv_2018
    │   │   ├── fig
    │   │   │   ├── all_cmab_phases_Part1.pdf
    │   │   │   ├── all_cmab_phases_Part2.pdf
    │   │   │   ├── all_cmab_phases_Part3.pdf
    │   │   │   ├── all_cmab_phases_Part4.pdf
    │   │   │   ├── all_cmab_phases_Part5.pdf
    │   │   │   ├── all_cmab_phases_Part6.pdf
    │   │   │   ├── all_cmab_phases_Part7.pdf
    │   │   │   ├── all_cmab_phases_Part8.pdf
    │   │   │   ├── cmab_chart.pdf
    │   │   │   ├── contextual_class.pdf
    │   │   │   ├── contextual_sequence.pdf
    │   │   │   ├── offline_bandit.pdf
    │   │   │   ├── section_2_3.pdf
    │   │   │   ├── section_3_2_1.pdf
    │   │   │   ├── section_3_2_2.pdf
    │   │   │   ├── section_4_2_plot.pdf
    │   │   │   ├── section_5_2.pdf
    │   │   │   ├── section_5_3.pdf
    │   │   │   ├── section_5_4.pdf
    │   │   │   ├── section_5_5.pdf
    │   │   │   ├── section_8_bar.pdf
    │   │   │   └── section_8_plot.pdf
    │   │   ├── jss.aux
    │   │   ├── jss.bbl
    │   │   ├── jss.bst
    │   │   ├── jss.cls
    │   │   ├── jss.out
    │   │   ├── jss.pdf
    │   │   ├── jss.synctex.gz
    │   │   └── jsslogo.jpg
    │   ├── bandit_algorithms_for_website_optimization.html
    │   ├── basic_epsilon_greedy.jpeg
    │   ├── basic_epsilon_greedy.jpg
    │   ├── basic_epsilon_greedy.png
    │   ├── carskit_depaul.jpeg
    │   ├── carskit_depaul.jpg
    │   ├── carskit_depaul.png
    │   ├── cmabs.html
    │   ├── cmabs.jpeg
    │   ├── cmabs.jpg
    │   ├── cmabs.png
    │   ├── cmabsoffline.html
    │   ├── compare.png
    │   ├── contextual-fig-1.jpg
    │   ├── contextual-fig-1.png
    │   ├── contextual-fig-2.jpg
    │   ├── contextual-fig-2.png
    │   ├── eckles_kaptein.html
    │   ├── eckles_kaptein_1.jpg
    │   ├── eckles_kaptein_1.png
    │   ├── eg_average_reward.jpeg
    │   ├── eg_average_reward.jpg
    │   ├── eg_average_reward.png
    │   ├── eg_cumulative_reward.jpeg
    │   ├── eg_cumulative_reward.jpg
    │   ├── eg_cumulative_reward.png
    │   ├── eg_incorrect.jpeg
    │   ├── eg_incorrect.jpg
    │   ├── eg_incorrect.png
    │   ├── eg_optimal_action.jpeg
    │   ├── eg_optimal_action.jpg
    │   ├── eg_optimal_action.png
    │   ├── epsilongreedy.html
    │   ├── index.html
    │   ├── introduction.html
    │   ├── linucboffline.jpeg
    │   ├── linucboffline.jpg
    │   ├── linucboffline.png
    │   ├── mabs.html
    │   ├── mabs.jpeg
    │   ├── mabs.jpg
    │   ├── mabs.png
    │   ├── ml10m.html
    │   ├── ml10m.jpg
    │   ├── ml10m.png
    │   ├── offline_depaul_movies.html
    │   ├── only_pkgdown
    │   │   └── faq.html
    │   ├── replication-fig-1.jpg
    │   ├── replication-fig-1.png
    │   ├── replication-fig-2.jpg
    │   ├── replication-fig-2.png
    │   ├── replication.html
    │   ├── simpsons.html
    │   ├── softmax_average_reward.jpeg
    │   ├── softmax_average_reward.jpg
    │   ├── softmax_average_reward.png
    │   ├── softmax_cumulative_reward.jpeg
    │   ├── softmax_cumulative_reward.jpg
    │   ├── softmax_cumulative_reward.png
    │   ├── softmax_optimal_action.jpeg
    │   ├── softmax_optimal_action.jpg
    │   ├── softmax_optimal_action.png
    │   ├── sutton_barto.html
    │   ├── sutton_eg_1.jpeg
    │   ├── sutton_eg_1.jpg
    │   ├── sutton_eg_1.png
    │   ├── sutton_eg_2.jpeg
    │   ├── sutton_eg_2.jpg
    │   ├── sutton_eg_2.png
    │   ├── sutton_gradient.jpeg
    │   ├── sutton_gradient.jpg
    │   ├── sutton_gradient.png
    │   ├── sutton_optimistic.jpeg
    │   ├── sutton_optimistic.jpg
    │   ├── sutton_optimistic.png
    │   ├── sutton_ucb.jpeg
    │   ├── sutton_ucb.jpg
    │   ├── sutton_ucb.png
    │   ├── sutton_violin.jpeg
    │   ├── sutton_violin.jpg
    │   ├── sutton_violin.png
    │   ├── ucb_average_reward.jpeg
    │   ├── ucb_average_reward.jpg
    │   ├── ucb_average_reward.png
    │   ├── ucb_cumulative_reward.jpeg
    │   ├── ucb_cumulative_reward.jpg
    │   ├── ucb_cumulative_reward.png
    │   ├── ucb_optimal_action.jpeg
    │   ├── ucb_optimal_action.jpg
    │   ├── ucb_optimal_action.png
    │   └── website_optimization.html
    ├── authors.html
    ├── bootstrap-toc.css
    ├── bootstrap-toc.js
    ├── docsearch.css
    ├── docsearch.js
    ├── docsearch.json
    ├── favicon.ico
    ├── index.html
    ├── jquery.sticky-kit.min.js
    ├── link.svg
    ├── news
    │   └── index.html
    ├── pkgdown.css
    ├── pkgdown.js
    ├── pkgdown.yml
    └── reference
    │   ├── Agent.html
    │   ├── Bandit.html
    │   ├── BasicBernoulliBandit.html
    │   ├── BasicGaussianBandit.html
    │   ├── BootstrapTSPolicy.html
    │   ├── ContextualBasicBandit.html
    │   ├── ContextualBernoulliBandit.html
    │   ├── ContextualBernoulliPrecachingBandit.html
    │   ├── ContextualBinaryBandit.html
    │   ├── ContextualEpochGreedyPolicy.html
    │   ├── ContextualEpsilonGreedy.html
    │   ├── ContextualEpsilonGreedyPolicy.html
    │   ├── ContextualHybridBandit.html
    │   ├── ContextualLinTSPolicy.html
    │   ├── ContextualLinearBandit.html
    │   ├── ContextualLogitBTSPolicy.html
    │   ├── ContextualLogitBandit.html
    │   ├── ContextualPrecachingBandit.html
    │   ├── ContextualTSProbitPolicy.html
    │   ├── ContextualThompsonSamplingPolicy.html
    │   ├── ContextualWheelBandit.html
    │   ├── ContinuumBandit.html
    │   ├── EpsilonFirstPolicy-1.png
    │   ├── EpsilonFirstPolicy-2.png
    │   ├── EpsilonFirstPolicy.html
    │   ├── EpsilonGreedyPolicy-1.png
    │   ├── EpsilonGreedyPolicy-2.png
    │   ├── EpsilonGreedyPolicy.html
    │   ├── Exp3Policy-1.png
    │   ├── Exp3Policy-2.png
    │   ├── Exp3Policy.html
    │   ├── FixedPolicy.html
    │   ├── GittinsBrezziLaiPolicy.html
    │   ├── GlmUCBPolicy.html
    │   ├── GradientPolicy-1.png
    │   ├── GradientPolicy-2.png
    │   ├── GradientPolicy.html
    │   ├── History.html
    │   ├── LifPolicy.html
    │   ├── LinUCBDisjointOptimizedPolicy.html
    │   ├── LinUCBDisjointPolicy.html
    │   ├── LinUCBGeneralPolicy.html
    │   ├── LinUCBHybridOptimizedPolicy.html
    │   ├── LinUCBHybridPolicy.html
    │   ├── OfflineBootstrappedReplayBandit.html
    │   ├── OfflineDirectMethodBandit.html
    │   ├── OfflineDoublyRobustBandit.html
    │   ├── OfflineLookupReplayEvaluatorBandit-TODO-colon-Needs-to-be-documented-more-fully..html
    │   ├── OfflineLookupReplayEvaluatorBandit.html
    │   ├── OfflinePolicyEvaluatorBandit.html
    │   ├── OfflinePropensityWeightingBandit.html
    │   ├── OfflineReplayEvaluatorBandit.html
    │   ├── OraclePolicy.html
    │   ├── Plot.html
    │   ├── Policy.html
    │   ├── RandomPolicy-1.png
    │   ├── RandomPolicy.html
    │   ├── Simulator.html
    │   ├── SoftmaxPolicy-1.png
    │   ├── SoftmaxPolicy-2.png
    │   ├── SoftmaxPolicy.html
    │   ├── ThompsonSamplingPolicy-1.png
    │   ├── ThompsonSamplingPolicy.html
    │   ├── UCB1Policy-1.png
    │   ├── UCB1Policy-2.png
    │   ├── UCB1Policy.html
    │   ├── UCB2Policy-1.png
    │   ├── UCB2Policy-2.png
    │   ├── UCB2Policy.html
    │   ├── ci_boot.html
    │   ├── clip.html
    │   ├── clipr.html
    │   ├── data_table_factors_to_numeric.html
    │   ├── dec-set.html
    │   ├── figures
    │       ├── 1simulator.jpeg
    │       ├── 2agent.jpeg
    │       ├── 3abandit.jpeg
    │       ├── 3bpolicy.jpeg
    │       ├── 3cbandit.jpeg
    │       ├── 3dpolicy.jpeg
    │       ├── algoepsilonfirst.jpg
    │       ├── cmab_all.jpeg
    │       ├── cmab_all_large.jpg
    │       └── cmab_all_medium.jpg
    │   ├── formatted_difftime.html
    │   ├── get_arm_context.html
    │   ├── get_full_context.html
    │   ├── get_global_seed.html
    │   ├── inc-set.html
    │   ├── ind.html
    │   ├── index.html
    │   ├── inv.html
    │   ├── inv_logit.html
    │   ├── invgamma-1.png
    │   ├── invgamma.html
    │   ├── invlogit.html
    │   ├── is_rstudio.html
    │   ├── max_in.html
    │   ├── mvrnorm.html
    │   ├── one_hot.html
    │   ├── ones_in_zeroes.html
    │   ├── plot.history.html
    │   ├── print.history.html
    │   ├── prob_winner.html
    │   ├── sample_one_of.html
    │   ├── set_external.html
    │   ├── set_global_seed.html
    │   ├── sherman_morrisson.html
    │   ├── sim_post.html
    │   ├── sum_of.html
    │   ├── summary.history.html
    │   ├── value_remaining-1.png
    │   ├── value_remaining.html
    │   ├── var_welford.html
    │   ├── which_max_list.html
    │   └── which_max_tied.html
├── man
    ├── Agent.Rd
    ├── Bandit.Rd
    ├── BasicBernoulliBandit.Rd
    ├── BasicGaussianBandit.Rd
    ├── BootstrapTSPolicy.Rd
    ├── ContextualBernoulliBandit.Rd
    ├── ContextualBinaryBandit.Rd
    ├── ContextualEpochGreedyPolicy.Rd
    ├── ContextualEpsilonGreedyPolicy.Rd
    ├── ContextualHybridBandit.Rd
    ├── ContextualLinTSPolicy.Rd
    ├── ContextualLinearBandit.Rd
    ├── ContextualLogitBTSPolicy.Rd
    ├── ContextualLogitBandit.Rd
    ├── ContextualPrecachingBandit.Rd
    ├── ContextualTSProbitPolicy.Rd
    ├── ContextualWheelBandit.Rd
    ├── ContinuumBandit.Rd
    ├── EpsilonFirstPolicy.Rd
    ├── EpsilonGreedyPolicy.Rd
    ├── Exp3Policy.Rd
    ├── FixedPolicy.Rd
    ├── GittinsBrezziLaiPolicy.Rd
    ├── GradientPolicy.Rd
    ├── History.Rd
    ├── LifPolicy.Rd
    ├── LinUCBDisjointOptimizedPolicy.Rd
    ├── LinUCBDisjointPolicy.Rd
    ├── LinUCBGeneralPolicy.Rd
    ├── LinUCBHybridOptimizedPolicy.Rd
    ├── LinUCBHybridPolicy.Rd
    ├── OfflineBootstrappedReplayBandit.Rd
    ├── OfflineDirectMethodBandit.Rd
    ├── OfflineDoublyRobustBandit.Rd
    ├── OfflineLookupReplayEvaluatorBandit.Rd
    ├── OfflinePropensityWeightingBandit.Rd
    ├── OfflineReplayEvaluatorBandit.Rd
    ├── OraclePolicy.Rd
    ├── Plot.Rd
    ├── Policy.Rd
    ├── RandomPolicy.Rd
    ├── Simulator.Rd
    ├── SoftmaxPolicy.Rd
    ├── ThompsonSamplingPolicy.Rd
    ├── UCB1Policy.Rd
    ├── UCB2Policy.Rd
    ├── clipr.Rd
    ├── data_table_factors_to_numeric.Rd
    ├── dec-set.Rd
    ├── figures
    │   ├── 1simulator.jpeg
    │   ├── 2agent.jpeg
    │   ├── 3abandit.jpeg
    │   ├── 3bpolicy.jpeg
    │   ├── 3cbandit.jpeg
    │   ├── 3dpolicy.jpeg
    │   ├── algoepsilonfirst.jpg
    │   ├── cmab_all.jpeg
    │   ├── cmab_all_large.jpg
    │   └── cmab_all_medium.jpg
    ├── formatted_difftime.Rd
    ├── get_arm_context.Rd
    ├── get_full_context.Rd
    ├── get_global_seed.Rd
    ├── inc-set.Rd
    ├── ind.Rd
    ├── inv.Rd
    ├── invgamma.Rd
    ├── invlogit.Rd
    ├── is_rstudio.Rd
    ├── mvrnorm.Rd
    ├── one_hot.Rd
    ├── ones_in_zeroes.Rd
    ├── plot.history.Rd
    ├── print.history.Rd
    ├── prob_winner.Rd
    ├── sample_one_of.Rd
    ├── set_external.Rd
    ├── set_global_seed.Rd
    ├── sherman_morrisson.Rd
    ├── sim_post.Rd
    ├── sum_of.Rd
    ├── summary.history.Rd
    ├── value_remaining.Rd
    ├── var_welford.Rd
    ├── which_max_list.Rd
    └── which_max_tied.Rd
├── tests
    ├── figs
    │   ├── deps.txt
    │   └── plot
    │   │   ├── arm-plot.svg
    │   │   ├── arms-color.svg
    │   │   ├── arms-lims.svg
    │   │   ├── average-regret-plot.svg
    │   │   ├── average-reward-plot.svg
    │   │   ├── basic-cumulative-plot.svg
    │   │   ├── color-and-lty-stepping.svg
    │   │   ├── cumulative-sd-plot.svg
    │   │   ├── cumulative-traces-plot.svg
    │   │   ├── legend-title-and-labels-plot.svg
    │   │   ├── limits-plot.svg
    │   │   ├── lwd-pot.svg
    │   │   ├── only-sd-plot.svg
    │   │   ├── plot-inc-var-no-color.svg
    │   │   ├── traces-alpha-and-max-plot.svg
    │   │   ├── traces-plot-smooth.svg
    │   │   └── ylim-plot.svg
    ├── testthat.R
    └── testthat
    │   ├── history_context_test.ref
    │   ├── history_context_theta_test.ref
    │   ├── history_test.ref
    │   ├── history_theta_test.ref
    │   ├── setup_tests.R
    │   ├── teardown_tests.R
    │   ├── test_agent.R
    │   ├── test_bandits.R
    │   ├── test_history.R
    │   ├── test_plot.R
    │   ├── test_policies.R
    │   ├── test_policy.R
    │   └── test_utility_functions.R
└── vignettes
    ├── 1.png
    ├── Rplot.png
    ├── basic_epsilon_greedy.png
    ├── carskit_depaul.png
    ├── cmabs.R
    ├── cmabs.Rmd
    ├── cmabs.png
    ├── cmabsoffline.R
    ├── cmabsoffline.Rmd
    ├── compare.png
    ├── contextual-fig-1.png
    ├── contextual-fig-2.png
    ├── eckles_kaptein.R
    ├── eckles_kaptein.Rmd
    ├── eckles_kaptein_0.png
    ├── eckles_kaptein_1.png
    ├── eg_average_reward.png
    ├── eg_cumulative_reward.png
    ├── eg_incorrect.png
    ├── eg_optimal_action.png
    ├── epsilongreedy.R
    ├── epsilongreedy.Rmd
    ├── introduction.R
    ├── introduction.Rmd
    ├── linucboffline.png
    ├── mabs.Rmd
    ├── mabs.png
    ├── ml10m.R
    ├── ml10m.Rmd
    ├── ml10m.png
    ├── offline_depaul_movies.R
    ├── offline_depaul_movies.Rmd
    ├── only_pkgdown
        ├── faq.Rmd
        └── faq.html
    ├── replication-fig-1.png
    ├── replication-fig-2.png
    ├── replication.R
    ├── replication.Rmd
    ├── simpsons.R
    ├── simpsons.Rmd
    ├── softmax_average_reward.png
    ├── softmax_cumulative_reward.png
    ├── softmax_optimal_action.png
    ├── sutton_barto.R
    ├── sutton_barto.Rmd
    ├── sutton_eg_1.png
    ├── sutton_eg_2.png
    ├── sutton_gradient.png
    ├── sutton_optimistic.png
    ├── sutton_ucb.png
    ├── sutton_violin.png
    ├── ucb_average_reward.png
    ├── ucb_cumulative_reward.png
    ├── ucb_optimal_action.png
    ├── website_optimization.R
    └── website_optimization.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^Meta$
 2 | ^doc$
 3 | ^vignettes/only_pkgdown$
 4 | ^vignettes/only_pkgdown.*
 5 | ^demo/working_directory$
 6 | ^demo/working_directory.*
 7 | contextual.Rproj
 8 | ^.*\.Rproj$
 9 | ^\.Rproj\.user$
10 | ^packrat/
11 | ^\.Rprofile$
12 | ^\.travis\.yml$
13 | ^codecov\.yml$
14 | ^appveyor\.yml$
15 | ^_pkgdown\.yml$
16 | ^progress\.txt$
17 | ^doparallel\.log$
18 | ^progress\.log$
19 | ^docs$
20 | ^vignettes/jss.*
21 | ^LICENSE\.md$
22 | ^credentials\.json$
23 | cran-comments.md
24 | ^revdep$
25 | 
26 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | data/* binary
4 | src/* text=lf
5 | R/* text=lf
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | Meta
 2 | doc
 3 | packrat/lib*/
 4 | inst/doc
 5 | tests/testthat/Rplots.pdf
 6 | yahoodb-journal
 7 | 
 8 | Thumbs.db
 9 | .DS_Store
10 | *tmp*
11 | credentials.json
12 | **/credentials.json
13 | **/doparallel.log
14 | **/progress.log
15 | *.Rda
16 | *.Rdata
17 | *.Rds
18 | *.log
19 | *.LOG
20 | docs/reference/RandomPolicy-1.png
21 | # Created by https://www.gitignore.io/api/r
22 | ### R ###
23 | # History files
24 | .Rhistory
25 | .Rapp.history
26 | # Session Data files
27 | .RData
28 | # Example code in package build process
29 | *-Ex.R
30 | # Output files from R CMD build
31 | /*.tar.gz
32 | # Output files from R CMD check
33 | /*.Rcheck/
34 | # RStudio files
35 | .Rproj.user/
36 | # produced vignettes
37 | vignettes/*.html
38 | vignettes/*.pdf
39 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
40 | .httr-oauth
41 | # knitr and R markdown default cache directories
42 | *_cache/
43 | cache/
44 | demo/working_directory
45 | demo/working_directory*
46 | # Temporary files created by R markdown
47 | *.utf8.md
48 | *.knit.md
49 | # Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html
50 | rsconnect/
51 | ### R.Bookdown Stack ###
52 | # R package: bookdown caching files
53 | *_files/
54 | *.orig
55 | .env
56 | *.tex
57 | *.csv
58 | /revdep/.cache.rds
59 | .Rproj.user
60 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: r
 2 | cache: packages
 3 | os:
 4 |   - linux
 5 |   - osx
 6 | r_packages:
 7 |   - covr
 8 | after_success:
 9 |   - Rscript -e 'library(covr); codecov()'
10 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: contextual
 2 | Type: Package
 3 | Title: Simulation and Analysis of Contextual Multi-Armed Bandit Policies
 4 | Version: 0.9.8.4
 5 | Authors@R: c(person("Robin", "van Emden", role = c("aut", "cre"), 
 6 |         comment = c(ORCID = "0000-0001-5820-8638"), email = "robinvanemden@gmail.com"),
 7 |     person("Maurits", "Kaptein", role = "ctb", email = "m.c.kaptein@tilburguniversity.edu", 
 8 |         comment = c(ORCID = "0000-0002-6316-7524")))
 9 | Maintainer: Robin van Emden <robinvanemden@gmail.com>
10 | Description: Facilitates the simulation and evaluation of context-free
11 |     and contextual multi-Armed Bandit policies or algorithms to ease the
12 |     implementation, evaluation, and dissemination of both existing and
13 |     new bandit algorithms and policies.
14 | License: GPL-3
15 | Encoding: UTF-8
16 | LazyData: true
17 | RoxygenNote: 7.1.1
18 | Depends:
19 |     R (>= 3.5.0)
20 | Imports:
21 |     R6 (>= 2.3.0),
22 |     data.table,
23 |     R.devices,
24 |     foreach,
25 |     doParallel,
26 |     itertools,
27 |     iterators,
28 |     Formula,
29 |     rjson
30 | Suggests:
31 |     testthat,
32 |     RCurl,
33 |     splitstackshape,
34 |     covr,
35 |     knitr,
36 |     here,
37 |     rmarkdown,
38 |     devtools,
39 |     ggplot2,
40 |     vdiffr
41 | VignetteBuilder: knitr
42 | URL: https://github.com/Nth-iteration-labs/contextual
43 | BugReports: https://github.com/Nth-iteration-labs/contextual/issues
44 | Roxygen: list(markdown = TRUE)
45 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | contextual 0.9.8.4
 2 | ==================
 3 | 
 4 | * Minor documentation updates.
 5 | * Fix for Exp3 bug (thanks, @leferrad)
 6 | * Cleanup of propensity score related code (thanks again, @leferrad)
 7 | * Updated tests.
 8 | 
 9 | 
10 | contextual 0.9.8.3
11 | ==================
12 | 
13 | * Tested and confirmed to be R 4.0.0 proof.
14 | * Minor documentation updates.
15 | * Now correctly restores global seed on completing a simulation (thanks, @pstansell)
16 | 
17 | 
18 | contextual 0.9.8.2
19 | ==================
20 | 
21 | * Minor documentation update
22 | * Minor refactoring: Private utility functions moved from the History to the Plot class.
23 | 
24 | contextual 0.9.8.1
25 | ==================
26 | 
27 | * Specified previous version of set.seed sampler with RNGversion() calls
28 | 
29 | contextual 0.9.8
30 | ================
31 | 
32 | * Major update
33 | * API change for offline Bandits
34 | * Fixes inverse propensity score weighting
35 | * Documentation updates
36 | * Additional demo scripts
37 | 
38 | contextual 0.9.1
39 | ================
40 | 
41 | * First CRAN release 
42 | 
43 | contextual 0.9.0
44 | ================
45 | 
46 | * CRAN Submission 
47 | 


--------------------------------------------------------------------------------
/R/policy_cmab_lin_epoch_greedy.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | ContextualEpochGreedyPolicy <- R6::R6Class(
 3 |   portable = FALSE,
 4 |   class = FALSE,
 5 |   inherit = Policy,
 6 |   public = list(
 7 |     sZl = NULL,
 8 |     exploration_phase = NULL,
 9 |     class_name = "ContextualEpochGreedyPolicy",
10 |     initialize = function(sZl = 10) {
11 |       super$initialize()
12 |       self$sZl <- sZl
13 |     },
14 |     set_parameters = function(context_params) {
15 |       d <- context_params$d
16 |       self$theta_to_arms <- list('A' = diag(1,d,d), 'b' = rep(0,d))
17 |     },
18 |     get_action = function(t, context) {
19 | 
20 |       if(t==1 || t%%self$sZl==0) self$exploration_phase = TRUE
21 | 
22 |       if (!isTRUE(self$exploration_phase)) {
23 |         expected_rewards <- rep(0.0, context$k)
24 |         for (arm in 1:context$k) {
25 |           Xa         <- get_arm_context(context, arm)
26 |           A          <- self$theta$A[[arm]]
27 |           b          <- self$theta$b[[arm]]
28 |           A_inv      <- inv(A)
29 |           theta_hat  <- A_inv %*% b
30 |           expected_rewards[arm] <- Xa %*% theta_hat
31 |         }
32 |         action$choice  <- which_max_tied(expected_rewards)
33 | 
34 |       } else {
35 |         self$action$choice        <- sample.int(context$k, 1, replace = TRUE)
36 |       }
37 |       action
38 |     },
39 |     set_reward = function(t, context, action, reward) {
40 |       arm    <- action$choice
41 |       reward <- reward$reward
42 |       Xa     <- get_arm_context(context, arm)
43 | 
44 |       if (isTRUE(self$exploration_phase)) {
45 |         inc(self$theta$A[[arm]]) <- outer(Xa, Xa)
46 |         inc(self$theta$b[[arm]]) <- reward * Xa
47 |         self$exploration_phase   <- FALSE
48 |       }
49 | 
50 |       self$theta
51 |     }
52 |   )
53 | )
54 | #' Policy: A Time and Space Efficient Algorithm for Contextual Linear Bandits
55 | #'
56 | #' @name ContextualEpochGreedyPolicy
57 | #'
58 | #'
59 | #' @section Usage:
60 | #' \preformatted{
61 | #'  policy <- ContextualEpochGreedyPolicy$new(sZl = 10)
62 | #' }
63 | #'
64 | #' @seealso
65 | #'
66 | #' Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}},
67 | #' \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}}
68 | #'
69 | #' Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}},
70 | #' \code{\link{OfflineReplayEvaluatorBandit}}
71 | #'
72 | #' Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}}
73 | NULL
74 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | # DO NOT CHANGE the "init" and "install" sections below
 2 | 
 3 | # Download script file from GitHub
 4 | init:
 5 |   ps: |
 6 |         $ErrorActionPreference = "Stop"
 7 |         Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1"
 8 |         Import-Module '..\appveyor-tool.ps1'
 9 | 
10 | install:
11 |   ps: Bootstrap
12 | 
13 | environment:
14 |   R_VERSION: stable
15 |   PKGTYPE: binary
16 |   USE_RTOOLS: true
17 |   R_REMOTES_STANDALONE: true
18 | 
19 | cache:
20 |   - C:\RLibrary
21 | 
22 | # Adapt as necessary starting from here
23 | 
24 | build_script:
25 |   - travis-tool.sh install_deps
26 | 
27 | test_script:
28 |   - travis-tool.sh run_tests
29 | 
30 | on_failure:
31 |   - 7z a failure.zip *.Rcheck\*
32 |   - appveyor PushArtifact failure.zip
33 | 
34 | artifacts:
35 |   - path: '*.Rcheck\**\*.log'
36 |     name: Logs
37 | 
38 |   - path: '*.Rcheck\**\*.out'
39 |     name: Logs
40 | 
41 |   - path: '*.Rcheck\**\*.fail'
42 |     name: Logs
43 | 
44 |   - path: '*.Rcheck\**\*.Rout'
45 |     name: Logs
46 | 
47 |   - path: '\*_*.tar.gz'
48 |     name: Bits
49 | 
50 |   - path: '\*_*.zip'
51 |     name: Bits
52 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |     patch:
10 |       default:
11 |         target: auto
12 |         threshold: 1%
13 | 
14 | language: R
15 | sudo: false
16 | cache: packages
17 | after_success:
18 |   - Rscript -e 'covr::codecov()'
19 | 


--------------------------------------------------------------------------------
/contextual.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: No
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageCheckArgs: --as-cran
22 | PackageRoxygenize: rd,collate,namespace,vignette
23 | 
24 | QuitChildProcessesOnExit: Yes
25 | DisableExecuteRprofile: Yes
26 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | * Minor documentation updates.
 2 | * Fix for Exp3 bug (thanks, @leferrad)
 3 | * Cleanup of propensity score related code (thanks again, @leferrad)
 4 | * Updated tests.
 5 | 
 6 | ## Test environments
 7 | 
 8 | * local Windows 10 (x64) release, R 4.0.2
 9 | * OS X install (on travis-ci) R-release
10 | * Ubuntu 12.04 (on travis-ci) R-release
11 | * Windows Server 2012 R2 x64 install (on appveyor), R 4.0.2
12 | * Rhub:
13 |   * Fedora Linux, R-devel, clang, gfortran
14 |   * Ubuntu Linux 16.04 LTS, R-release, GCC
15 |   * Windows Server 2008 R2 SP1, R-devel, 32/64 bit
16 | * win-builder (devel, oldrelease and release)
17 | 
18 | ## R CMD check results
19 | 
20 | ### Generally no errors, no warnings, no notes
21 | 
22 | ```
23 | 0 ERRORs | 0 WARNINGs | 0 NOTES.
24 | ```
25 | 
26 | ### Oldrelease and Ubuntu Linux 16.04: 1 NOTE
27 | 
28 | ```
29 |   Author field differs from that derived from Authors@R
30 |     Author:    'Robin van Emden [aut, cre] (<https://orcid.org/0000-0001-5820-8638>), Maurits Kaptein [ctb]       (<https://orcid.org/0000-0002-6316-7524>)'   
31 |     Authors@R: 'Robin van Emden [aut, cre] (0000-0001-5820-8638), Maurits Kaptein [ctb] (0000-0002-6316-7524)'
32 | ```
33 | The only way to get rid of this is by removing the ORCID from the Authors@R comment field - which is processed correctly in R versions later than oldrelease/Ununtu 16.04. Presume this can safely be ignored.
34 | 
35 | ## Downstream dependencies
36 | 
37 | No ERRORs or WARNINGs found 
38 | 


--------------------------------------------------------------------------------
/demo/00Index:
--------------------------------------------------------------------------------
 1 | demo_subsubclass                                  Further subclassing of existing policies and bandits.
 2 | demo_sine_bandit                                  Bandit reward function fluctuating over time.
 3 | demo_offline_cmab_alpha_linucb_direct_method      Offline bandit and parameter evaluation - direct method.
 4 | demo_offline_cmab_alpha_linucb_replay             Offline bandit and parameter evaluation - replay.
 5 | demo_mab_policy_comparison                        Comparison of some contextual-free bandits.
 6 | demo_epsilon_greedy_policy                        Basic simulation of a context-free policy.
 7 | demo_lif_bandit                                   Use of continuum bandit and LiF policy.
 8 | demo_cmab_policy_comparison_linear_bandit         Comparison of a contextual policies with linear bandit.
 9 | demo_cmab_policy_comparison_weight_bandit         Comparison of a contextual policies with weight bandit.
10 | demo_simpsons_paradox_propensity                  Simpson's Paradox to demonstrate propensity weighting.
11 | demo_sutton_barto                                 Contextual code reproducing Sutton & Barto (2018) plots.
12 | demo_bandit_algorithms_for_website_optimization   Contextual code reproducing John Myles White (2012) plots.
13 | demo_epsilon_greedy_to_epoch_greedy_policy        Contextual epsilon epoch and greedy.
14 | 


--------------------------------------------------------------------------------
/demo/alternative_parallel_backends/azure/cluster.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "contextual_test_cluster_1",
 3 |   "vmSize": "Standard_F2",
 4 |   "maxTasksPerNode": 2,
 5 |   "poolSize": {
 6 |     "dedicatedNodes": {
 7 |         "min": 2,
 8 |         "max": 2
 9 |     },
10 |     "lowPriorityNodes": {
11 |         "min": 0,
12 |         "max": 0
13 |     },
14 |     "autoscaleFormula": "QUEUE"
15 |   },
16 |   "rPackages": {
17 |     "cran": ["foreach", "data.table", "itertools"],
18 |     "github": ["Nth-iteration-labs/contextual"],
19 |     "githubAuthenticationToken": ""
20 |   },
21 |   "commandLine": []
22 | }
23 | 


--------------------------------------------------------------------------------
/demo/alternative_parallel_backends/azure/credentials-sample.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "batchAccount": {
 3 |     "name": "batch_account_name_here",
 4 |     "key": "batch_account_key_here",
 5 |     "url": "https://batch_account_name_here.area_here.batch.azure.com"
 6 |   },
 7 |   "storageAccount": {
 8 |     "name": "storate_account_name_here",
 9 |     "key": "storate_account_key_here"
10 |   },
11 |   "githubAuthenticationToken": "githubAuthenticationToken_here"
12 | }
13 | 


--------------------------------------------------------------------------------
/demo/alternative_parallel_backends/azure/demo_azure.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | library(here)
 3 | 
 4 | setwd(here::here("demo","alt_par_backend_examples","azure"))
 5 | 
 6 | source("simulator_azure.R")
 7 | 
 8 | #devtools::install_github("Azure/rAzureBatch")
 9 | #devtools::install_github("Azure/doAzureParallel")
10 | 
11 | ## follow setup and install of doAzureParallel
12 | ## at https://github.com/Azure/doAzureParallel
13 | 
14 | ## sample credentials in the same directory as this file
15 | ## add your credentials and save to the current directory
16 | 
17 | horizon       <- 1000L
18 | simulations   <- 4L
19 | 
20 | bandit        <- ContextualLinearBandit$new(k = 5, d = 5)
21 | 
22 | agents        <-list(Agent$new(EpsilonGreedyPolicy$new(0.1), bandit),
23 |                      Agent$new(ContextualLogitBTSPolicy$new(10), bandit),
24 |                      Agent$new(LinUCBDisjointOptimizedPolicy$new(1.0), bandit))
25 | 
26 | simulation     <- AzureSimulator$new(agents, horizon, simulations)
27 | 
28 | history        <- simulation$run()
29 | 
30 | plot(history, type = "cumulative", rate = FALSE, legend_position = "topleft")
31 | 


--------------------------------------------------------------------------------
/demo/alternative_parallel_backends/azure/simulator_azure.R:
--------------------------------------------------------------------------------
 1 | # AzureSimulator is a subclass of Simulator
 2 | # substituting doParallel with doAzureParallel.
 3 | 
 4 | # devtools::install_github("Azure/rAzureBatch")
 5 | # devtools::install_github("Azure/doAzureParallel")
 6 | 
 7 | library(contextual)
 8 | library(foreach)
 9 | library(doAzureParallel)
10 | library(here)
11 | 
12 | setwd(here::here("demo","alt_par_backend_examples","azure"))
13 | 
14 | AzureSimulator <- R6::R6Class(
15 |   inherit = Simulator,
16 |   public = list(
17 |     register_parallel_backend = function() {
18 | 
19 |       # 1. Generate your credential and cluster configuration files.
20 |       doAzureParallel::generateClusterConfig("cluster.json")
21 |       doAzureParallel::generateCredentialsConfig("credentials.json")
22 | 
23 |       # 2. Fill out your credential config and cluster config files.
24 | 
25 |       # 3. Set your credentials - you need to give the R session your credentials to interact with Azure
26 |       doAzureParallel::setCredentials("credentials.json")
27 | 
28 |       # 4. Register the pool. This will create a new pool if your pool hasn't already been provisioned.
29 |       super$cl <- doAzureParallel::makeCluster("cluster.json")
30 | 
31 |       # 5. Register the pool as your parallel backend
32 |       doAzureParallel::registerDoAzureParallel(super$cl)
33 | 
34 |       # 6. Check that your parallel backend has been registered
35 |       super$workers = foreach::getDoParWorkers()
36 | 
37 |       message(paste0("Azure workers: ", super$workers))
38 |     },
39 |     stop_parallel_backend = function() {
40 |       try({
41 |         doAzureParallel::stopCluster(super$cl)
42 |       })
43 |     }
44 |   )
45 | )
46 | 


--------------------------------------------------------------------------------
/demo/alternative_parallel_backends/azure/test_azure_connection.R:
--------------------------------------------------------------------------------
 1 | library(foreach)
 2 | library(doAzureParallel)
 3 | library(here)
 4 | 
 5 | setwd(here::here("demo","alternative_parallel_backends","azure"))
 6 | 
 7 | # 1. Generate your credential and cluster configuration files.
 8 | doAzureParallel::generateClusterConfig("cluster.json")
 9 | doAzureParallel::generateCredentialsConfig("credentials.json")
10 | 
11 | # 2. Fill out your credential config and cluster config files.
12 | #    Enter your Azure Batch Account & Azure Storage keys/account-info into your
13 | #    credential config ("credentials.json") and configure your cluster in your
14 | #    cluster config ("cluster.json")
15 | 
16 | # 3. Set your credentials - you need to give the R session your credentials to
17 | #    interact with Azure
18 | doAzureParallel::setCredentials("credentials.json")
19 | 
20 | # 4. Register the pool. This will create a new pool if your pool hasn't already
21 | #    been provisioned.
22 | cl <- doAzureParallel::makeCluster("cluster.json")
23 | 
24 | # 5. Register the pool as your parallel backend
25 | doAzureParallel::registerDoAzureParallel(cl)
26 | 
27 | # 6. Check that your parallel backend has been registered
28 | workers = foreach::getDoParWorkers()
29 | message(paste0("Workers: ",workers))
30 | 
31 | clusters <- doAzureParallel::getClusterList()
32 | print(clusters)
33 | 
34 | doAzureParallel::stopCluster(cl)
35 | 


--------------------------------------------------------------------------------
/demo/alternative_parallel_backends/redis/demo_redis.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | library(here)
 3 | 
 4 | setwd(here::here("demo","alt_par_backend_examples","redis"))
 5 | 
 6 | source("simulator_redis.R")
 7 | 
 8 | library(contextual)
 9 | 
10 | horizon       <- 1000L
11 | simulations   <- 4L
12 | 
13 | bandit        <- ContextualLinearBandit$new(k = 5, d = 5)
14 | 
15 | agents        <-list(Agent$new(EpsilonGreedyPolicy$new(0.1), bandit),
16 |                      Agent$new(ContextualLogitBTSPolicy$new(10), bandit),
17 |                      Agent$new(LinUCBDisjointOptimizedPolicy$new(1.0), bandit))
18 | 
19 | simulation     <- RedisSimulator$new(agents, horizon, simulations)
20 | 
21 | history        <- simulation$run()
22 | 
23 | plot(history, type = "cumulative", rate = FALSE, legend_position = "topleft")
24 | 


--------------------------------------------------------------------------------
/demo/alternative_parallel_backends/redis/simulator_redis.R:
--------------------------------------------------------------------------------
 1 | # RedisSimulator is a subclass of Simulator
 2 | # substituting doParallel with doRedis.
 3 | 
 4 | # Before running the example, follow instructions at:
 5 | #
 6 | # https://github.com/bwlewis/doRedis
 7 | #
 8 | # Then open one or more R sessions that will act as back-end worker processes.
 9 | # Run the following in each session:
10 | #
11 | # require('doRedis')
12 | # redisWorker('jobs')
13 | 
14 | library(contextual)
15 | library(foreach)
16 | library(doRedis)
17 | 
18 | RedisSimulator <- R6::R6Class(
19 |   inherit = Simulator,
20 |   public = list(
21 |     register_parallel_backend = function() {
22 |       options('redis:num'=TRUE)
23 |       doRedis::registerDoRedis('jobs')
24 |       super$workers = foreach::getDoParWorkers()
25 |     },
26 |     stop_parallel_backend = function() {
27 |       try({
28 |         doRedis::removeQueue('jobs')
29 |       })
30 |     }
31 |   )
32 | )
33 | 


--------------------------------------------------------------------------------
/demo/alternative_parallel_backends/rmpi/demo_rmpi.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | library(here)
 3 | 
 4 | setwd(here::here("demo","alt_par_backend_examples","rmpi"))
 5 | 
 6 | source("simulator_rmpi.R")
 7 | 
 8 | library(contextual)
 9 | 
10 | horizon       <- 1000L
11 | simulations   <- 4L
12 | 
13 | bandit        <- ContextualLinearBandit$new(k = 5, d = 5)
14 | 
15 | agents        <-list(Agent$new(EpsilonGreedyPolicy$new(0.1), bandit),
16 |                      Agent$new(ContextualLogitBTSPolicy$new(10), bandit),
17 |                      Agent$new(LinUCBDisjointOptimizedPolicy$new(1.0), bandit))
18 | 
19 | simulation     <- MPISimulator$new(agents, horizon, simulations)
20 | 
21 | history        <- simulation$run()
22 | 
23 | plot(history, type = "cumulative", rate = FALSE, legend_position = "topleft")
24 | 


--------------------------------------------------------------------------------
/demo/alternative_parallel_backends/rmpi/simulator_rmpi.R:
--------------------------------------------------------------------------------
 1 | # MPISimulator is a subclass of Simulator
 2 | # substituting doParallel with doMPI.
 3 | 
 4 | library(contextual)
 5 | library(foreach)
 6 | library(Rmpi)
 7 | library(doMPI)
 8 | 
 9 | MPISimulator <- R6::R6Class(
10 |   inherit = Simulator,
11 |   public = list(
12 |     register_parallel_backend = function() {
13 |       super$cl <- doMPI::startMPIcluster()
14 |       doMPI::registerDoMPI(super$cl)
15 |       super$workers = foreach::getDoParWorkers()
16 |       message(paste0("MPI workers: ", super$workers))
17 |     },
18 |     stop_parallel_backend = function() {
19 |       try({
20 |         doMPI::closeCluster(super$cl)
21 |       })
22 |     }
23 |   )
24 | )
25 | 


--------------------------------------------------------------------------------
/demo/alternative_parallel_backends/rmpi/test_rmpi_connection.R:
--------------------------------------------------------------------------------
 1 | library(foreach)
 2 | library(Rmpi)
 3 | 
 4 | # Instructions for installing Rmpi: http://fisher.stats.uwo.ca/faculty/yu/Rmpi/
 5 | 
 6 | mpi.spawn.Rslaves()
 7 | Sys.sleep(3)
 8 | 
 9 | mpi.setup.rngstream(iseed=123)
10 | mpi.parReplicate(80, mean(rnorm(1000000)))
11 | 
12 | mpi.close.Rslaves()
13 | 


--------------------------------------------------------------------------------
/demo/demo_cmab_policy_comparison_linear_bandit.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | 
 3 | horizon       <- 400L
 4 | simulations   <- 300L
 5 | 
 6 | bandit        <- ContextualLinearBandit$new(k = 5, d = 5, sigma = 0.1)
 7 | 
 8 | agents <- list(Agent$new(EpsilonGreedyPolicy$new(0.1), bandit, "EGreedy"),
 9 |                Agent$new(ContextualEpsilonGreedyPolicy$new(0.1), bandit, "cEGreedy"),
10 |                Agent$new(ContextualLinTSPolicy$new(0.01), bandit, "LinTS"),
11 |                Agent$new(LinUCBDisjointOptimizedPolicy$new(1), bandit, "LinUCB"))
12 | 
13 | simulation     <- Simulator$new(agents, horizon, simulations)
14 | 
15 | history        <- simulation$run()
16 | 
17 | plot(history, type = "cumulative", regret = FALSE, rate = TRUE, legend_position = "topleft")
18 | 


--------------------------------------------------------------------------------
/demo/demo_cmab_policy_comparison_weight_bandit.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | 
 3 | horizon     <- 1000L
 4 | simulations <- 100L
 5 | 
 6 | weights     <- matrix(c(0.8, 0.1, 0.1,
 7 |                         0.1, 0.8, 0.1,
 8 |                         0.1, 0.1, 0.8), nrow = 3, ncol = 3, byrow = TRUE)
 9 | 
10 | bandit      <- ContextualBinaryBandit$new(weights = weights)
11 | agents      <- list(Agent$new(ContextualTSProbitPolicy$new(draws = 100), bandit, "TSProbit"),
12 |                     Agent$new(ContextualEpsilonGreedyPolicy$new(0.1), bandit, "cEGreedy"),
13 |                     Agent$new(ContextualLogitBTSPolicy$new(draws = 100), bandit, "LogitBTS"),
14 |                     Agent$new(LinUCBDisjointPolicy$new(0.6), bandit, "LinUCB"))
15 | 
16 | simulation  <- Simulator$new(agents, horizon, simulations)
17 | history     <- simulation$run()
18 | 
19 | plot(history, type = "cumulative", no_par = TRUE, legend_border = FALSE, legend_position = "topleft")
20 | 


--------------------------------------------------------------------------------
/demo/demo_epsilon_greedy_policy.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | 
 3 | policy             <- EpsilonGreedyPolicy$new(epsilon = 0.1)
 4 | 
 5 | bandit             <- BasicBernoulliBandit$new(weights = c(0.6, 0.1, 0.1))
 6 | 
 7 | agent              <- Agent$new(policy,bandit)
 8 | 
 9 | simulator          <- Simulator$new(agents      = agent,
10 |                                     horizon     = 100,
11 |                                     simulations = 1000)
12 | simulator$run()
13 | 
14 | plot(simulator$history, type = "cumulative", regret = TRUE, disp = "ci",
15 |                         traces = TRUE, traces_max = 100, traces_alpha = 0.1)
16 | 
17 | summary(simulator$history)
18 | 
19 | sim_data          <- simulator$history$get_data_table()
20 | sim_cum_data      <- simulator$history$get_cumulative_data()
21 | 


--------------------------------------------------------------------------------
/demo/demo_lif_bandit.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | 
 3 | horizon            <- 1000
 4 | simulations        <- 100
 5 | 
 6 | continuous_arms    <- function(x) {
 7 |   -0.1*(x - 5) ^ 2 + 3.5  + rnorm(length(x),0,0.4)
 8 | }
 9 | 
10 | int_time           <- 100
11 | amplitude          <- 0.2
12 | learn_rate         <- 0.3
13 | omega              <- 2*pi/int_time
14 | x0_start           <- 2.0
15 | 
16 | policy             <- LifPolicy$new(int_time, amplitude, learn_rate, omega, x0_start)
17 | 
18 | bandit             <- ContinuumBandit$new(FUN = continuous_arms)
19 | 
20 | agent              <- Agent$new(policy,bandit)
21 | 
22 | history            <- Simulator$new(agents      = agent,
23 |                                     horizon     = horizon,
24 |                                     simulations = simulations)$run()
25 | 
26 | plot(history, type = "average", regret = FALSE)
27 | 


--------------------------------------------------------------------------------
/demo/demo_mab_policy_comparison.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | 
 3 | prob_per_arm       <- c(0.5, 0.3, 0.1)
 4 | horizon            <- 150
 5 | simulations        <- 2000
 6 | 
 7 | bandit             <- BasicBernoulliBandit$new(prob_per_arm)
 8 | 
 9 | agents             <- list(Agent$new(EpsilonGreedyPolicy$new(0.1), bandit),
10 |                            Agent$new(ThompsonSamplingPolicy$new(1, 1), bandit),
11 |                            Agent$new(Exp3Policy$new(0.1), bandit),
12 |                            Agent$new(GittinsBrezziLaiPolicy$new(), bandit),
13 |                            Agent$new(UCB1Policy$new(), bandit),
14 |                            Agent$new(UCB2Policy$new(0.1), bandit))
15 | 
16 | simulation         <- Simulator$new(agents, horizon, simulations)
17 | history            <- simulation$run()
18 | 
19 | plot(history, type = "cumulative")
20 | 
21 | summary(history)
22 | 


--------------------------------------------------------------------------------
/demo/demo_offline_cmab_alpha_linucb_direct_method.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | library(data.table)
 3 | library(Formula)
 4 | 
 5 | # Import personalization data-set
 6 | 
 7 | data         <- fread("http://d1ie9wlkzugsxr.cloudfront.net/data_cmab_basic/data.txt")
 8 |                                          # 0/1 reward, 10 arms, 100 features
 9 |                                          # arms always start from 1
10 | 
11 | #      z y x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15  .. x100
12 | #   1: 2 0  5  0  0 37  6  0  0  0  0  25   0   0   7   1   0  ..    0
13 | #   2: 8 0  1  3 36  0  0  0  0  0  0   0   0   1   0   0   0  ..   10
14 | #   3: . .  .  .  .  .  .  .  .  .  .   .   .   .   .   .   .  ..    .
15 | 
16 | simulations <- 1
17 | horizon     <- nrow(data)
18 | 
19 | # Run regression per arm, predict outcomes, and save results, a column per arm
20 | 
21 | x                <- reformulate(names(data)[3:102],response="y")     # x: x1 .. x100
22 | f                <- Formula::as.Formula(x)                           # y ~ x
23 | 
24 | model_f          <- function(arm) glm(f, data=data[z==arm], family=binomial(link="logit"), y=F, model=F)
25 | arms             <- sort(unique(data$z))
26 | model_arms       <- lapply(arms, FUN = model_f)
27 | 
28 | predict_arm      <- function(model) predict(model, data, type = "response")
29 | r_data           <- lapply(model_arms, FUN = predict_arm)
30 | r_data           <- do.call(cbind, r_data)
31 | colnames(r_data) <- paste0("r", (1:max(arms)))
32 | 
33 | # Bind data and model predictions
34 | 
35 | data             <- cbind(data,r_data)
36 | 
37 | # Run direct method style offline bandit
38 | 
39 | x                <- reformulate(names(data)[3:102], response="y")
40 | z                <- ~ z
41 | r                <- ~ r1 + r2 + r3 + r4 + r5 + r6 + r7 + r8 + r9 + r10
42 | 
43 | f                <- as.Formula(z,x,r)    # Resulting in: y ~ z | x1 + x2 .. | r1 + r2 + ..
44 | 
45 | bandit           <- OfflineDirectMethodBandit$new(formula = f, data = data)
46 | 
47 | # Define agents.
48 | agents      <- list(Agent$new(LinUCBDisjointOptimizedPolicy$new(0.01), bandit, "alpha = 0.01"),
49 |                     Agent$new(LinUCBDisjointOptimizedPolicy$new(0.05), bandit, "alpha = 0.05"),
50 |                     Agent$new(LinUCBDisjointOptimizedPolicy$new(0.1),  bandit, "alpha = 0.1"),
51 |                     Agent$new(LinUCBDisjointOptimizedPolicy$new(1.0),  bandit, "alpha = 1.0"))
52 | 
53 | # Initialize the simulation.
54 | 
55 | simulation  <- Simulator$new(agents = agents, simulations = simulations, horizon = horizon)
56 | 
57 | # Run the simulation.
58 | sim  <- simulation$run()
59 | 
60 | # plot the results
61 | plot(sim, type = "cumulative", regret = FALSE, rate = TRUE, legend_position = "bottomright", ylim = c(0,1))
62 | 


--------------------------------------------------------------------------------
/demo/demo_offline_cmab_alpha_linucb_replay.R:
--------------------------------------------------------------------------------
 1 | library(contextual); library(data.table)
 2 | 
 3 | dt      <- fread("http://d1ie9wlkzugsxr.cloudfront.net/data_cmab_basic/data.txt")
 4 |                                     # 0/1 reward, 10 arms, 100 features
 5 |                                     # arms always start from 1
 6 | 
 7 | #      z y x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15  .. x100
 8 | #   1: 2 0  5  0  0 37  6  0  0  0  0  25   0   0   7   1   0  ..    0
 9 | #   2: 8 0  1  3 36  0  0  0  0  0  0   0   0   1   0   0   0  ..   10
10 | #   3: . .  .  .  .  .  .  .  .  .  .   .   .   .   .   .   .  ..    .
11 | 
12 | # Set up formula:       y      ~ z    | x1 + x2 + ..
13 | # In bandit parlance:   reward ~ arms | covariates or contextual features
14 | 
15 | f       <- y ~ z | . - z
16 | 
17 | # Instantiate Replay Bandit (Li, 2010)
18 | bandit  <- OfflineReplayEvaluatorBandit$new(formula = f, data = dt)
19 | 
20 | # Bind Policies withs Bandits through Agents, add Agents to list
21 | agents  <- list(
22 |   Agent$new(UCB2Policy$new(0.01),           bandit, "UCB2   alpha = 0.01"),
23 |   Agent$new(LinUCBDisjointPolicy$new(0.01), bandit, "LinUCB alpha = 0.01"),
24 |   Agent$new(LinUCBDisjointPolicy$new(0.1),  bandit, "LinUCB alpha = 0.1"))
25 | 
26 | # Instantiate and run a Simulator, plot the resulting History object
27 | history <- Simulator$new(agents, horizon = nrow(dt), simulations = 5)$run()
28 | plot(history, type = "cumulative", regret = FALSE, legend_border = FALSE)
29 | 


--------------------------------------------------------------------------------
/demo/demo_sine_bandit.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | 
 3 | # Based on a section of Dai Shi's thesis
 4 | # "Exploring Bandit Algorithms for Automatic Content Selection"
 5 | 
 6 | horizon            <- 600L
 7 | simulations        <- 300L
 8 | 
 9 | TwoArmedSineBandit <- R6::R6Class(
10 |   inherit = Bandit,
11 |   class = FALSE,
12 |   public = list(
13 |     sigma = NULL,
14 |     class_name = "TwoArmedSineBandit",
15 |     initialize  = function(k = 2, sigma = 0.2) {
16 |       self$k = k
17 |       self$sigma = sigma
18 |     },
19 |     get_context = function(t) {
20 |       context     <- list(k = self$k)
21 |     },
22 |     get_reward = function(t, context, action) {
23 |       rseq        <- seq(0,2, by = 2/self$k)[-(self$k+1)]
24 |       sine        <- sapply(rseq,self$sine,t)
25 |       reward      <- sine + rnorm(1, sd = self$sigma)
26 |       reward      <- list(
27 |         reward                   = reward[action$choice],
28 |         optimal_reward           = sine[which_max_tied(sine)],
29 |         optimal_arm              = contextual::which_max_tied(sine)
30 |       )
31 |     },
32 |     sine = function(phi, t) {
33 |       omega       <- 0.125; A <- 0.5; p <- 1.0;
34 |       A * (sin(omega * pi * t /10 + phi * pi) + p)
35 |     }
36 |   )
37 | )
38 | 
39 | bandit             <- TwoArmedSineBandit$new()
40 | 
41 | agents             <- list(Agent$new(Exp3Policy$new(0.1), bandit),
42 |                            Agent$new(UCB1Policy$new(), bandit))
43 | 
44 | simulation         <- Simulator$new(agents, horizon = horizon, simulations = simulations, do_parallel = TRUE)
45 | 
46 | history            <- simulation$run()
47 | 
48 | plot(history, type = "average", regret = FALSE, disp = "var", plot_only_disp = TRUE)
49 | plot(history, type = "average", regret = TRUE, disp = "var", plot_only_disp = TRUE)
50 | plot(history, type = "cumulative", disp = "var", rate = TRUE, plot_only_disp = TRUE)
51 | plot(history, type = "average", regret = FALSE, disp = "var")
52 | plot(history, type = "average", regret = TRUE, disp = "var")
53 | plot(history, type = "cumulative", disp = "var", rate = TRUE)
54 | 


--------------------------------------------------------------------------------
/demo/demo_subsubclass.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | 
 3 | BasicPoissonBandit <- R6::R6Class(
 4 |   inherit = BasicBernoulliBandit,
 5 |   class = FALSE,
 6 |   public = list(
 7 |     weights = NULL,
 8 |     class_name = "BasicPoissonBandit",
 9 |     # Override get_reward & generate Poisson based rewards
10 |     get_reward = function(t, context, action) {
11 |       reward_means = rep(2,self$k)
12 |       rpm <- rpois(self$k, reward_means)
13 |       rewards <- matrix(rpm < self$weights, self$k, 1)*1
14 |       optimal_arm    <- which_max_tied(self$weights)
15 |       reward         <- list(
16 |         reward                   = rewards[action$choice],
17 |         optimal_arm              = optimal_arm,
18 |         optimal_reward           = rewards[optimal_arm]
19 |       )
20 |     }
21 |   )
22 | )
23 | 
24 | EpsilonGreedyAnnealingPolicy <- R6::R6Class(
25 |   # Class extends EpsilonGreedyPolicy
26 |   inherit = EpsilonGreedyPolicy,
27 |   portable = FALSE,
28 |   public = list(
29 |     class_name = "EpsilonGreedyAnnealingPolicy",
30 |     # Override EpsilonGreedyPolicy's get_action, use annealing epsilon
31 |     get_action = function(t, context) {
32 |       self$epsilon <- 1/(log(100*t+0.001))
33 |       super$get_action(t, context)
34 |     }
35 |   )
36 | )
37 | 
38 | weights <- c(7,1,2)
39 | horizon <- 200
40 | simulations <- 1000
41 | bandit <- BasicPoissonBandit$new(weights)
42 | ega_policy <- EpsilonGreedyAnnealingPolicy$new()
43 | eg_policy  <- EpsilonGreedyPolicy$new(0.2)
44 | agents <- list(Agent$new(ega_policy, bandit, "EG Annealing"),
45 |                Agent$new(eg_policy, bandit, "EG"))
46 | simulation <- Simulator$new(agents, horizon, simulations, do_parallel = TRUE)
47 | history <- simulation$run()
48 | 
49 | plot(history, type = "cumulative", no_par = TRUE, legend_border = FALSE,
50 |      legend_position = "bottomright")
51 | 


--------------------------------------------------------------------------------
/demo/evaluations_on_public_datasets/demo_carskit_depaul.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | library(data.table)
 3 | 
 4 | # Import personalization data-set
 5 | 
 6 | # Info: https://d1ie9wlkzugsxr.cloudfront.net/data_irecsys_CARSKit/Movie_DePaulMovie/README.txt
 7 | 
 8 | url         <- "http://d1ie9wlkzugsxr.cloudfront.net/data_irecsys_CARSKit/Movie_DePaulMovie/ratings.csv"
 9 | data        <- fread(url, stringsAsFactors=TRUE)
10 | 
11 | # Convert data
12 | 
13 | data        <- contextual::one_hot(data, cols = c("Time","Location","Companion"), sparsifyNAs = TRUE)
14 | data[, itemid := as.numeric(itemid)]
15 | data[, rating := ifelse(rating <= 3, 0, 1)]
16 | 
17 | # Set simulation parameters.
18 | simulations <- 10  # here, "simulations" represents the number of boostrap samples
19 | horizon     <- nrow(data)
20 | 
21 | # Initiate Replay bandit with 10 arms and 100 context dimensions
22 | log_S       <- data
23 | formula     <- formula("rating ~ itemid | Time_Weekday + Time_Weekend + Location_Cinema + Location_Home +
24 |                                           Companion_Alone + Companion_Family + Companion_Partner")
25 | bandit      <- OfflineBootstrappedReplayBandit$new(formula = formula, data = data)
26 | 
27 | # Define agents.
28 | agents      <-
29 |   list(Agent$new(RandomPolicy$new(), bandit, "Random"),
30 |        Agent$new(EpsilonGreedyPolicy$new(0.03), bandit, "EGreedy 0.05"),
31 |        Agent$new(ThompsonSamplingPolicy$new(), bandit, "ThompsonSampling"),
32 |        Agent$new(LinUCBDisjointOptimizedPolicy$new(0.37), bandit, "LinUCB 0.37"))
33 | 
34 | # Initialize the simulation.
35 | simulation  <-
36 |   Simulator$new(
37 |     agents           = agents,
38 |     simulations      = simulations,
39 |     horizon          = horizon
40 |   )
41 | 
42 | # Run the simulation.
43 | # Takes about 5 minutes: bootstrapbandit loops for arms x horizon x simulations (times nr of agents).
44 | sim  <- simulation$run()
45 | 
46 | # plot the results
47 | plot(sim, type = "cumulative", regret = FALSE, rate = TRUE,
48 |          legend_position = "topleft", ylim=c(0.48,0.87))
49 | 


--------------------------------------------------------------------------------
/demo/offline_bandit_evaluations/demo_offline_bootstrap_replay.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | library(data.table)
 3 | 
 4 | # Import myocardial infection dataset
 5 | 
 6 | url  <- "http://d1ie9wlkzugsxr.cloudfront.net/data_propensity/myocardial_propensity.csv"
 7 | data            <- fread(url)
 8 | 
 9 | simulations     <- 1000
10 | horizon         <- nrow(data)
11 | 
12 | # arms always start at 1
13 | data$trt        <- data$trt + 1
14 | 
15 | # turn death into alive, making it a reward
16 | data$alive      <- abs(data$death - 1)
17 | 
18 | # run bandit - when leaving out p, Propensity Bandit uses marginal prob per arm for propensities:
19 | # table(private$z)/length(private$z)
20 | 
21 | f          <- alive ~ trt | age + risk + severity
22 | 
23 | bandit     <- OfflineBootstrappedReplayBandit$new(formula = f, data = data)
24 | 
25 | # Define agents.
26 | agents      <- list(Agent$new(LinUCBDisjointOptimizedPolicy$new(0.2), bandit, "LinUCB"))
27 | 
28 | # Initialize the simulation.
29 | 
30 | simulation  <- Simulator$new(agents = agents, simulations = simulations, horizon = horizon)
31 | 
32 | # Run the simulation.
33 | sim  <- simulation$run()
34 | 
35 | # plot the results
36 | plot(sim, type = "cumulative", regret = FALSE, rate = TRUE, legend_position = "bottomright")
37 | 
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/demo/offline_bandit_evaluations/demo_offline_direct_method.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | library(data.table)
 3 | 
 4 | # Import myocardial infection dataset
 5 | 
 6 | url             <- "http://d1ie9wlkzugsxr.cloudfront.net/data_propensity/myocardial_propensity.csv"
 7 | data            <- fread(url)
 8 | 
 9 | simulations     <- 1000
10 | horizon         <- nrow(data)
11 | 
12 | # arms always start at 1
13 | data$trt        <- data$trt + 1
14 | 
15 | # turn death into alive, making it a reward
16 | data$alive      <- abs(data$death - 1)
17 | 
18 | # Run regression per arm, predict outcomes, and save results, a column per arm
19 | 
20 | f                <- alive ~ age + risk + severity
21 | 
22 | model_f          <- function(arm) glm(f, data=data[trt==arm], family=binomial(link="logit"), y=F, model=F)
23 | arms             <- sort(unique(data$trt))
24 | model_arms       <- lapply(arms, FUN = model_f)
25 | 
26 | predict_arm      <- function(model) predict(model, data, type = "response")
27 | r_data           <- lapply(model_arms, FUN = predict_arm)
28 | r_data           <- do.call(cbind, r_data)
29 | colnames(r_data) <- paste0("R", (1:max(arms)))
30 | 
31 | # Bind data and model predictions
32 | 
33 | data             <- cbind(data,r_data)
34 | 
35 | # Define Bandit
36 | 
37 | f                <- alive ~ trt | age + risk + severity | R1 + R2
38 | 
39 | bandit           <- OfflineDirectMethodBandit$new(formula = f, data = data)
40 | 
41 | # Define agents.
42 | agents      <- list(Agent$new(LinUCBDisjointOptimizedPolicy$new(0.2), bandit, "LinUCB"))
43 | 
44 | # Initialize the simulation.
45 | 
46 | simulation  <- Simulator$new(agents = agents, simulations = simulations, horizon = horizon)
47 | 
48 | # Run the simulation.
49 | sim  <- simulation$run()
50 | 
51 | # plot the results
52 | plot(sim, type = "cumulative", regret = FALSE, rate = TRUE, legend_position = "bottomright")
53 | 
54 | 


--------------------------------------------------------------------------------
/demo/offline_bandit_evaluations/demo_offline_doubly_robust.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | library(data.table)
 3 | 
 4 | # Import myocardial infection dataset
 5 | data  <- fread("http://d1ie9wlkzugsxr.cloudfront.net/data_propensity/myocardial_propensity.csv")
 6 | 
 7 | simulations     <- 1000
 8 | horizon         <- nrow(data)
 9 | 
10 | # arms always start at 1
11 | data$trt        <- data$trt + 1
12 | 
13 | # turn death into alive, making it a reward
14 | data$alive      <- abs(data$death - 1)
15 | 
16 | # Run regression per arm, predict outcomes, and save results, a column per arm
17 | f                <- alive ~ age + risk + severity
18 | model_f          <- function(arm) glm(f, data=data[trt==arm],
19 |                                       family=binomial(link="logit"),
20 |                                       y=FALSE, model=FALSE)
21 | arms             <- sort(unique(data$trt))
22 | model_arms       <- lapply(arms, FUN = model_f)
23 | 
24 | predict_arm      <- function(model) predict(model, data, type = "response")
25 | r_data           <- lapply(model_arms, FUN = predict_arm)
26 | r_data           <- do.call(cbind, r_data)
27 | colnames(r_data) <- paste0("r", (1:max(arms)))
28 | 
29 | # Bind data and model predictions
30 | data       <- cbind(data,r_data)
31 | 
32 | # calculate propensity weights
33 | m          <- glm(I(trt-1) ~ age + risk + severity,
34 |                   data=data, family=binomial(link="logit"))
35 | data$p     <- predict(m, type = "response")
36 | 
37 | # formula notation of dataset:
38 | # (without p, doublyrobustbandit uses marginal prob per arm for propensities)
39 | f           <- alive ~ trt | age + risk + severity | r1 + r2  | p
40 | 
41 | bandit      <- OfflineDoublyRobustBandit$new(formula = f, data = data)
42 | agents      <- list(Agent$new(LinUCBDisjointOptimizedPolicy$new(0.2), bandit, "LinUCB"))
43 | simulation  <- Simulator$new(agents, horizon, simulations)
44 | sim  <- simulation$run()
45 | # plot the results
46 | plot(sim, type = "cumulative", regret = FALSE, rate = TRUE, legend_position = "bottomright")
47 | 


--------------------------------------------------------------------------------
/demo/offline_bandit_evaluations/demo_offline_propensity_score.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | library(data.table)
 3 | 
 4 | # Import myocardial infection dataset
 5 | 
 6 | url  <- "http://d1ie9wlkzugsxr.cloudfront.net/data_propensity/myocardial_propensity.csv"
 7 | data            <- fread(url)
 8 | 
 9 | simulations     <- 1000
10 | horizon         <- nrow(data)
11 | 
12 | # arms always start at 1
13 | data$trt        <- data$trt + 1
14 | 
15 | # turn death into alive, making it a reward
16 | data$alive      <- abs(data$death - 1)
17 | 
18 | # calculate propensity weights
19 | 
20 | m      <- glm(I(trt-1) ~ age + risk + severity, data=data, family=binomial(link="logit"))
21 | data$p <- predict(m, type = "response")
22 | 
23 | # run bandit - when leaving out p, Propensity Bandit uses marginal prob per arm for propensities:
24 | # table(private$z)/length(private$z)
25 | 
26 | f          <- alive ~ trt | age + risk + severity | p
27 | 
28 | bandit     <- OfflinePropensityWeightingBandit$new(formula = f, data = data)
29 | 
30 | # Define agents.
31 | agents      <- list(Agent$new(LinUCBDisjointOptimizedPolicy$new(0.2), bandit, "LinUCB"))
32 | 
33 | # Initialize the simulation.
34 | 
35 | simulation  <- Simulator$new(agents = agents, simulations = simulations, horizon = horizon)
36 | 
37 | # Run the simulation.
38 | sim  <- simulation$run()
39 | 
40 | # plot the results
41 | plot(sim, type = "cumulative", regret = FALSE, rate = TRUE, legend_position = "bottomright")
42 | 
43 | 


--------------------------------------------------------------------------------
/demo/replication_eckles_kaptein_2014/demo_bootstrap_fig_2.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | 
 3 | # Replication of THOMPSON SAMPLING WITH THE ONLINE BOOTSTRAP By Dean Eckles and Maurits Kaptein
 4 | 
 5 | # https://arxiv.org/abs/1410.4009
 6 | 
 7 | # Fig 2. Empirical regret for Thompson sampling and BTS in a K-armed binomial bandit problem with
 8 | # varied differences between the optimal arm and all others.
 9 | 
10 | bandit             <- BasicBernoulliBandit$new(weights = c(0.5, rep(0.4,9)))
11 | 
12 | agents             <- list(Agent$new(BootstrapTSPolicy$new(1000), bandit, "BTS 1000"),
13 |                            Agent$new(ThompsonSamplingPolicy$new(), bandit, "TS"))
14 | 
15 | simulator          <- Simulator$new(agents        = agents,
16 |                                     do_parallel   = TRUE,
17 |                                     save_interval = 50,
18 |                                     set_seed      = 999,
19 |                                     horizon       = 1e+05,
20 |                                     simulations   = 1000)
21 | 
22 | simulator$run()
23 | 
24 | plot(simulator$history, log = "x")
25 | 


--------------------------------------------------------------------------------
/demo/replication_eckles_kaptein_2014/demo_bootstrap_fig_3.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | 
 3 | # Replication of THOMPSON SAMPLING WITH THE ONLINE BOOTSTRAP By Dean Eckles and Maurits Kaptein
 4 | 
 5 | # https://arxiv.org/abs/1410.4009
 6 | 
 7 | # Fig 3: Comparison of empirical regret for BTS with varied number of bootstrap replicates.
 8 | 
 9 | # Sim completes within an hour on a 12 core server.
10 | 
11 | bandit             <- BasicBernoulliBandit$new(weights = c(0.5, rep(0.4,9)))
12 | 
13 | agents             <- list(Agent$new(BootstrapTSPolicy$new(10), bandit, "BTS 10"),
14 |                            Agent$new(BootstrapTSPolicy$new(100), bandit, "BTS 100"),
15 |                            Agent$new(BootstrapTSPolicy$new(1000), bandit, "BTS 1000"),
16 |                            Agent$new(BootstrapTSPolicy$new(10000), bandit, "BTS 10000"))
17 | 
18 | simulator          <- Simulator$new(agents        = agents,
19 |                                     do_parallel   = TRUE,
20 |                                     save_interval = 50,
21 |                                     horizon       = 1e+05,
22 |                                     simulations   = 1000)
23 | 
24 | simulator$run()
25 | 
26 | plot(simulator$history, log = "x")
27 | 


--------------------------------------------------------------------------------
/demo/replication_kruijswijk_2018/README.md:
--------------------------------------------------------------------------------
1 | # Bandits with dependent observations
2 | 
3 | Code for replication plots of the paper "Exploiting Nested Data Structures in Multi-Armed Bandits" (submitted to PLOS One).
4 | 
5 | Run file 2a and 2b to generate the plots for the simulation study. Do note that running these can take quite a while - especially for the partial pooling version for Thompson sampling - so use with care.
6 | 
7 | Run file 3 to generate the plots for the empirical study using the supplied .csv file.
8 | 


--------------------------------------------------------------------------------
/demo/replication_kruijswijk_2018/bandit_bootstrapped_replay.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | DependentObservationsBootstrappedBandit <- R6::R6Class(
 3 |   inherit = Bandit,
 4 |   class = FALSE,
 5 |   private = list(
 6 |     S = NULL,
 7 |     x = NULL,
 8 |     rows = NULL
 9 |   ),
10 |   public = list(
11 |     class_name = "DependentObservationsBootstrappedBandit",
12 |     arm_multiply = NULL,
13 |     initialize   = function(offline_data, arms) {
14 |       self$k <- arms
15 |       self$d <- 1
16 |       private$S <- offline_data
17 |       if(!"context" %in% colnames(private$S)) private$S$context = list(1)
18 |       private$S[is.null(context[[1]]),`:=`(context = list(1))]
19 |       self$arm_multiply <- TRUE
20 |       private$S <- do.call("rbind", replicate(self$k, private$S, simplify = FALSE))
21 |       private$rows <- nrow(private$S)  # <- daar
22 |     },
23 |     post_initialization = function() {
24 |       private$S <- private$S[sample(nrow(private$S),replace=TRUE)]
25 |       private$x <- as.matrix(private$S$context)
26 |       private$x <- apply(private$x, 2, jitter)
27 |     },
28 |     get_context = function(index) {
29 |       print(index)
30 |       if(index > private$rows) return(NULL)   # <- en daar ...
31 |       context <- list(
32 |         k      = self$k,
33 |         d      = self$d,
34 |         user_context = private$S$user[[index]],
35 |         X      = private$x[[index]]
36 |       )
37 |       context
38 |     },
39 |     get_reward = function(index, context, action) {
40 |       reward_at_index <- as.double(private$S$reward[[index]])
41 |       if (private$S$choice[[index]] == action$choice) {
42 |         list(
43 |           reward = reward_at_index
44 |         )
45 |       } else {
46 |         NULL
47 |       }
48 |     }
49 |   )
50 | )
51 | 


--------------------------------------------------------------------------------
/demo/replication_kruijswijk_2018/bandit_replay.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | DependentObservationsReplayBandit <- R6::R6Class(
 3 |   inherit = Bandit,
 4 |   class = FALSE,
 5 |   private = list(
 6 |     S = NULL
 7 |   ),
 8 |   public = list(
 9 |     class_name = "DependentObservationsReplayBandit",
10 |     initialize   = function(offline_data, arms) {
11 |       self$k <- arms
12 |       self$d <- 1
13 |       private$S <- offline_data
14 |       if(!"context" %in% colnames(private$S)) private$S$context = list(1)
15 |       private$S[is.null(context[[1]]),`:=`(context = list(1))]
16 |     },
17 |     post_initialization = function() {
18 |       private$S <- private$S[sample(nrow(private$S))]
19 |     },
20 |     get_context = function(index) {
21 |       context <- list(
22 |         k      = self$k,
23 |         d      = self$d,
24 |         user_context = private$S$user[[index]],
25 |         X      = private$S$context[[index]]
26 |       )
27 |       context
28 |     },
29 |     get_reward = function(index, context, action) {
30 |       reward_at_index <- as.double(private$S$reward[[index]])
31 |       if (private$S$choice[[index]] == action$choice) {
32 |         list(
33 |           reward = reward_at_index
34 |         )
35 |       } else {
36 |         NULL
37 |       }
38 |     }
39 |   )
40 | )
41 | 


--------------------------------------------------------------------------------
/demo/replication_kruijswijk_2018/beta_binom_hier_model.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int<lower=0> n_subjects;                          // items/subjects
 3 |   int<lower=0> n[n_subjects];                       // total trials
 4 |   int<lower=0> l[n_subjects];                       // total successes
 5 | }
 6 | 
 7 | parameters {
 8 |   real<lower=0, upper=1> phi;                       // population chance of success
 9 |   real<lower=1> kappa;                              // population concentration
10 |   vector<lower=0, upper=1>[n_subjects] theta;       // chance of success
11 | }
12 | 
13 | model {
14 |   kappa ~ pareto(1, 1.5);                           // hyperprior
15 |   theta ~ beta(phi * kappa, (1 - phi) * kappa);     // prior
16 |   l ~ binomial(n, theta);                           // likelihood
17 | }
18 | 


--------------------------------------------------------------------------------
/demo/replication_kruijswijk_2019/README.md:
--------------------------------------------------------------------------------
1 | # offline-parameter-tuning
2 | Code for the offline paramater tuning paper (submitted to IDA 2020)
3 | 
4 | For the replications of the plots, see demo_lif_bandit.R and demo_tbl_bandit.R
5 | 


--------------------------------------------------------------------------------
/demo/replication_kruijswijk_2019/bandit_continuum_offon.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | OnlineOfflineContinuumBandit <- R6::R6Class(
 3 |   inherit = Bandit,
 4 |   class = FALSE,
 5 |   private = list(
 6 |     S = NULL
 7 |   ),
 8 |   public = list(
 9 |     class_name = "OnlineOfflineContinuumBandit",
10 |     delta = NULL,
11 |     horizon = NULL,
12 |     choice = NULL,
13 |     arm_function = NULL,
14 |     initialize   = function(FUN, delta, horizon) {
15 |       self$arm_function <- FUN
16 |       self$horizon <- horizon
17 |       self$delta <- delta
18 |       self$k <- 1
19 |     },
20 |     post_initialization = function() {
21 |       self$choice <- runif(self$horizon, min=0, max=1)
22 |       private$S <- data.frame(self$choice, self$arm_function(self$choice))
23 |       private$S <- private$S[sample(nrow(private$S)),]
24 |       colnames(private$S) <- c('choice', 'reward')
25 |     },
26 |     get_context = function(index) {
27 |       context           <- list()
28 |       context$k         <- self$k
29 |       context
30 |     },
31 |     get_reward = function(index, context, action) {
32 |       reward_at_index <- as.double(private$S$reward[[index]])
33 |       if (abs(private$S$choice[[index]] - action$choice) < self$delta) {
34 |         reward <- list(
35 |           reward = reward_at_index
36 |         )
37 |       } else {
38 |         NULL
39 |       }
40 |     }
41 |   )
42 | )
43 | 


--------------------------------------------------------------------------------
/demo/replication_kruijswijk_2019/bandit_continuum_offon_kern.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | OnlineOfflineContinuumBanditKernel <- R6::R6Class(
 3 |   inherit = Bandit,
 4 |   class = FALSE,
 5 |   private = list(
 6 |     S = NULL,
 7 |     n = NULL
 8 |   ),
 9 |   public = list(
10 |     class_name = "OnlineOfflineContinuumBanditKernel",
11 |     delta = NULL,
12 |     c1 = NULL,
13 |     c2 = NULL,
14 |     arm_function = NULL,
15 |     choice = NULL,
16 |     h = NULL,
17 |     kernel = NULL,
18 |     horizon = NULL,
19 |     initialize   = function(FUN, horizon) {
20 |       self$arm_function <- FUN
21 |       self$k <- 1
22 |       self$horizon <- horizon
23 |       self$h <- horizon^(-1/5)
24 |       self$kernel <- function(action_true, action_choice, bandwith){ 1/sqrt(2*pi)*exp(((action_choice - action_true) / bandwith)^2/2) }
25 |     },
26 |     post_initialization = function() {
27 |       self$choice <- runif(self$horizon, min=0, max=1)
28 |       private$S <- data.frame(self$choice, self$arm_function(self$choice))
29 |       private$S <- private$S[sample(nrow(private$S)),]
30 |       colnames(private$S) <- c('choice', 'reward')
31 |       private$n <- 0
32 |     },
33 |     get_context = function(index) {
34 |       context           <- list()
35 |       context$k         <- self$k
36 |       context
37 |     },
38 |     get_reward = function(index, context, action) {
39 |       reward_at_index <- as.double(private$S$reward[[index]])
40 |       #kern_value <- self$kernel(action_true = private$S$choice[[index]], action_choice = action$choice, bandwith = self$h)
41 |       temp_u <- (action$choice - private$S$choice[[index]]) / self$h
42 |       kern_value <- 1/sqrt(2*pi) * exp(-temp_u^2 / 2)
43 |       #inc(private$n) <- 1
44 |       #print(paste0("Kernel value: ", kern_value, "action choice: ", action$choice, "true action: ", private$S$choice[[index]], "divy: ", temp_u))
45 |       reward <- list(
46 |         reward = (kern_value * reward_at_index),
47 |         optimal_reward = self$c2
48 |       )
49 |     }
50 |   )
51 | )
52 | 


--------------------------------------------------------------------------------
/demo/replication_kruijswijk_2019/policy_tbl.R:
--------------------------------------------------------------------------------
 1 | ThompsonBayesianLinearPolicy <- R6::R6Class(
 2 |   portable = FALSE,
 3 |   class = FALSE,
 4 |   inherit = Policy,
 5 |   public = list(
 6 |     class_name = "ThompsonBayesianLinearPolicy",
 7 |     J = NULL,
 8 |     P = NULL,
 9 |     err = NULL,
10 |     initialize = function(J = matrix(c(0, 0.025, -0.025), nrow=1, ncol=3, byrow = TRUE),
11 |                           P = matrix(diag(c(2,2,5)), nrow=3, ncol=3, byrow = TRUE),
12 |                           err=1) {
13 |       super$initialize()
14 |       self$J <- J
15 |       self$P <- P
16 |       self$err <- err
17 |     },
18 |     set_parameters = function(context_params) {
19 |       self$theta <- list('J' = self$J, 'P' = self$P, 'err' = self$err)
20 |     },
21 |     get_action = function(t, context) {
22 |       sigma <- solve(self$theta$P, tol = 1e-200)
23 |       mu <- sigma %*% matrix(self$theta$J)
24 |       betas <- contextual::mvrnorm(n = 1, mu, sigma)
25 |       action$choice <- -(betas[2] / (2*betas[3]))
26 |       if(action$choice > 1){
27 |         action$choice <- 1
28 |       } else if(action$choice < 0) {
29 |         action$choice <- 0
30 |       }
31 |       action
32 |     },
33 |     set_reward = function(t, context, action, reward) {
34 |       y <- reward$reward
35 |       x <- action$choice
36 |       x <- matrix(c(1,x,x^2), nrow = 1, ncol = 3, byrow = TRUE)
37 |       self$theta$J <- (x*y)/self$theta$err + self$theta$J
38 |       self$theta$P <- t(x)%*%x + self$theta$P
39 |       self$theta
40 |     }
41 |   )
42 | )
43 | 


--------------------------------------------------------------------------------
/demo/replication_li_2010/3_plotter.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | library(lobstr)
 3 | 
 4 | # Config -----------------------------------------------------------------------------------------------------
 5 | 
 6 | load_file_name          <- "Yahoo_T_2e+06_sparse_0.99.RData"
 7 | 
 8 | # Setup ------------------------------------------------------------------------------------------------------
 9 | 
10 | history  <- History$new()
11 | 
12 | # Take a look at the results ---------------------------------------------------------------------------------
13 | 
14 | history$load_data_table(load_file_name)
15 | 
16 | plot(history, regret = FALSE, rate = TRUE, type = "cumulative", legend_position = "bottomright", interval = 1000)
17 | 


--------------------------------------------------------------------------------
/demo/replication_li_2010/4_plotter.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | 
 3 | 
 4 | load_file_names         <- list("D:/results/Yahoo_T_4681992/Yahoo_T_4681992_sparse_0.RData",
 5 |                                 "D:/results/Yahoo_T_4681992/Yahoo_T_4681992_sparse_0.7.RData",
 6 |                                 "D:/results/Yahoo_T_4681992/Yahoo_T_4681992_sparse_0.8.RData",
 7 |                                 "D:/results/Yahoo_T_4681992/Yahoo_T_4681992_sparse_0.9.RData",
 8 |                                 "D:/results/Yahoo_T_4681992/Yahoo_T_4681992_sparse_0.95.RData",
 9 |                                 "D:/results/Yahoo_T_4681992/Yahoo_T_4681992_sparse_0.99.RData")
10 | 
11 | history  <- History$new()
12 | 
13 | ctr_list        <- list()
14 | 
15 | for(i in seq_along(load_file_names)) {
16 |   history$load(load_file_names[[i]])
17 |   first_day_n     <- floor(history$meta$sim$max_t)
18 |   first_day_data  <- history$get_cumulative_result(as_list = FALSE, t = first_day_n)
19 |   ctr             <- first_day_data$cum_reward_rate
20 |   agents          <- first_day_data$agent
21 |   ctr_relative    <- ctr / ctr[match("Random",agents)]
22 |   ctr_relative    <- ctr_relative[!ctr_relative==1]
23 | 
24 |   ctr_list[[i]]   <- c(ctr_relative)
25 | }
26 | 
27 | agents_relative <- agents[!agents=="Random"]
28 | 
29 | all_ctr <- data.frame("100%" = ctr_list[[1]],   "30%" = ctr_list[[2]],
30 |                       "20%"  = ctr_list[[3]],   "10%" = ctr_list[[4]],
31 |                       "5%"   = ctr_list[[5]],   "1%"  = ctr_list[[6]],  check.names = FALSE)
32 | 
33 | omniscient <- 1.615
34 | par(mfrow = c(1, 1), mar = c(4, 4, 0.3, 0.1), cex=1.3)
35 | barplot(as.matrix(all_ctr),  xpd = FALSE, beside=TRUE, legend = FALSE,
36 |         ylab="ctr", las=1, xlab="data size", ylim = c(1,1.8))
37 | abline(h=omniscient, col="gray", lwd=1, lty=2)
38 | barplot(as.matrix(all_ctr),  xpd = FALSE,col=gray.colors(6), beside=TRUE,
39 |         legend = agents_relative, args.legend = list(x = 'topright'),
40 |         ylab="ctr", las=1, xlab="data size", ylim = c(1,1.8),add=TRUE)
41 | box(lwd=3)
42 | 
43 | message("Plot completed")
44 | 


--------------------------------------------------------------------------------
/demo/replication_li_2010/alternative_db_scripts/2_run_the_simulation_on_monetdb.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | source("../dev.R")
 3 | 
 4 | library(data.table)
 5 | library(DBI)
 6 | library(MonetDBLite)
 7 | library(here)
 8 | 
 9 | 
10 | setwd(here::here("demo", "replication_li_2010"))
11 | 
12 | source("yahoo_bandit.R")
13 | source("yahoo_policy_epsilon_greedy.R")
14 | 
15 | # Connect to DB ----------------------------------------------------------------------------------------------
16 | 
17 | # monetdb.sequential=T is the difference between monetdblite life and death
18 | 
19 | options(monetdb.sequential=T)
20 | 
21 | db_dir <- "C:/YahooDb/yahoo.monetdblite"
22 | 
23 | con    <- dbConnect(MonetDBLite::MonetDBLite(), db_dir)
24 | 
25 | print(paste0("MonetDBLite: connection to '",dbListTables(con),"' database succesful!"))
26 | 
27 | # Config -----------------------------------------------------------------------------------------------------
28 | 
29 | simulations <- 1
30 | horizon     <- 10000
31 | 
32 | counted_rows <- as.integer(DBI::dbGetQuery(con, "SELECT COUNT(*) FROM yahoo" ))
33 | max_t        <- as.integer(DBI::dbGetQuery(con, "SELECT max(t) FROM yahoo" ))
34 | 
35 | print(counted_rows == max_t)
36 | 
37 | # Get arm/article lookup
38 | 
39 | arms_articles <- as.matrix(DBI::dbGetQuery(con, "SELECT DISTINCT article_id FROM yahoo"))
40 | class(arms_articles) <- "integer"
41 | 
42 | # Initiate YahooBandit ---------------------------------------------------------------------------------------
43 | 
44 | bandit      <- YahooBandit$new(con, k = 217L, d = 36L, arm_lookup = arms_articles)
45 | 
46 | agents <-
47 |   list(
48 |     Agent$new(YahooEpsilonGreedyPolicy$new(0.01), bandit, name = "EGreedy")
49 |   )
50 | 
51 | # Define the simulation --------------------------------------------------------------------------------------
52 | 
53 | simulation <-
54 |   Simulator$new(
55 |     agents,
56 |     simulations = simulations,
57 |     horizon = horizon,
58 |     do_parallel = FALSE,
59 |     write_progress_file = TRUE,
60 |     include_packages = c("DBI","MonetDBLite")
61 |   )
62 | 
63 | # Run the simulation
64 | 
65 | sim  <- simulation$run()
66 | 
67 | # Take a look at the results ---------------------------------------------------------------------------------
68 | 
69 | print(sim$meta$sim_total_duration)
70 | 
71 | plot(sim, regret = FALSE, rate = FALSE, type = "cumulative")
72 | 
73 | df <- sim$get_data_frame()
74 | 
75 | dbDisconnect(con, shutdown = TRUE)
76 | 
77 | 


--------------------------------------------------------------------------------
/demo/replication_li_2010/alternative_db_scripts/2_run_the_simulation_on_monetdb_lite.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | 
 3 | library(data.table)
 4 | library(DBI)
 5 | library(MonetDBLite)
 6 | library(here)
 7 | 
 8 | setwd(here::here("demo", "replication_li_2010"))
 9 | 
10 | source("yahoo_bandit.R")
11 | source("yahoo_policy_epsilon_greedy.R")
12 | 
13 | # Connect to DB ----------------------------------------------------------------------------------------------
14 | 
15 | # monetdb.sequential=T is the difference between monetdblite life and death
16 | 
17 | options(monetdb.sequential=T)
18 | 
19 | db_dir <- "C:/YahooDb/yahoo.monetdblite"
20 | 
21 | con    <- dbConnect(MonetDBLite::MonetDBLite(), db_dir)
22 | 
23 | print(paste0("MonetDBLite: connection to '",dbListTables(con),"' database succesful!"))
24 | 
25 | # Config -----------------------------------------------------------------------------------------------------
26 | 
27 | simulations <- 1
28 | horizon     <- 10000
29 | 
30 | counted_rows <- as.integer(DBI::dbGetQuery(con, "SELECT COUNT(*) FROM yahoo" ))
31 | max_t        <- as.integer(DBI::dbGetQuery(con, "SELECT max(t) FROM yahoo" ))
32 | 
33 | print(counted_rows == max_t)
34 | 
35 | # Get arm/article lookup
36 | 
37 | arms_articles <- as.matrix(DBI::dbGetQuery(con, "SELECT DISTINCT article_id FROM yahoo"))
38 | class(arms_articles) <- "integer"
39 | 
40 | # Initiate YahooBandit ---------------------------------------------------------------------------------------
41 | 
42 | bandit      <- YahooBandit$new(con, k = 217L, d = 36L, arm_lookup = arms_articles)
43 | 
44 | agents <-
45 |   list(
46 |     Agent$new(YahooEpsilonGreedyPolicy$new(0.01), bandit, name = "EGreedy")
47 |   )
48 | 
49 | # Define the simulation --------------------------------------------------------------------------------------
50 | 
51 | simulation <-
52 |   Simulator$new(
53 |     agents,
54 |     simulations = simulations,
55 |     horizon = horizon,
56 |     do_parallel = FALSE,
57 |     write_progress_file = TRUE,
58 |     include_packages = c("DBI","MonetDBLite")
59 |   )
60 | 
61 | # Run the simulation
62 | 
63 | sim  <- simulation$run()
64 | 
65 | # Take a look at the results ---------------------------------------------------------------------------------
66 | 
67 | print(sim$meta$sim_total_duration)
68 | 
69 | plot(sim, regret = FALSE, rate = FALSE, type = "cumulative")
70 | 
71 | df <- sim$get_data_frame()
72 | 
73 | dbDisconnect(con, shutdown = TRUE)
74 | 
75 | 


--------------------------------------------------------------------------------
/demo/replication_li_2010/demo_yahoo_classes/yahoo_policy_epsilon_greedy.R:
--------------------------------------------------------------------------------
 1 | YahooEpsilonGreedyPolicy          <- R6::R6Class(
 2 |   portable = FALSE,
 3 |   class = FALSE,
 4 |   inherit = Policy,
 5 |   public = list(
 6 |     epsilon = NULL,
 7 |     class_name = "YahooEpsilonGreedyPolicy",
 8 |     initialize = function(epsilon = 0.1) {
 9 |       super$initialize()
10 |       self$epsilon                <- epsilon
11 |     },
12 |     set_parameters = function(context_params) {
13 |       self$theta_to_arms          <- list('n' = 0, 'mean' = 0)
14 |     },
15 |     get_action = function(t, context) {
16 |       if (runif(1) > self$epsilon) {
17 |         max_index                 <- context$arms[which_max_list(self$theta$mean[context$arms])]
18 |         self$action$choice        <- max_index
19 |       } else {
20 |         self$action$choice        <- sample(context$arms, 1)
21 |       }
22 |       self$action
23 |     },
24 |     set_reward = function(t, context, action, reward) {
25 | 
26 |       arm                         <- action$choice
27 |       reward                      <- reward$reward
28 | 
29 |       self$theta$n[[arm]]         <- self$theta$n[[arm]] + 1
30 |       self$theta$mean[[arm]]      <- self$theta$mean[[arm]] + (reward - self$theta$mean[[arm]]) / self$theta$n[[arm]]
31 | 
32 |       self$theta
33 |     }
34 |   )
35 | )
36 | 


--------------------------------------------------------------------------------
/demo/replication_li_2010/demo_yahoo_classes/yahoo_policy_epsilon_greedy_seg.R:
--------------------------------------------------------------------------------
 1 | YahooEpsilonGreedySegPolicy          <- R6::R6Class(
 2 |   portable = FALSE,
 3 |   class = FALSE,
 4 |   inherit = Policy,
 5 |   public = list(
 6 |     epsilon = NULL,
 7 |     cluster = NULL,
 8 |     class_name = "YahooEpsilonGreedySegPolicy",
 9 |     initialize = function(epsilon = 0.1) {
10 |       super$initialize()
11 |       self$epsilon                <- epsilon
12 |     },
13 |     set_parameters = function(context_params) {
14 |       self$theta_to_arms          <- list('n' = rep(0,5), 'mean' = rep(0,5))
15 |     },
16 |     get_action = function(t, context) {
17 |       local_arms                  <- context$arms
18 |       if (runif(1) > self$epsilon) {
19 |         # find the feature on which a user scores highest - that is this user's cluster
20 |         self$cluster              <- which.max(head(context$X[context$unique,1],-1))
21 |         expected_rewards          <- rep(0.0, length(local_arms))
22 |         for (arm in seq_along(local_arms)) {
23 |           expected_rewards[arm]   <- self$theta$mean[[local_arms[arm]]][self$cluster]
24 |         }
25 |         action$choice             <- local_arms[which_max_tied(expected_rewards)]
26 |       } else {
27 |         action$choice             <- sample(local_arms, 1)
28 |       }
29 |       action
30 |     },
31 |     set_reward = function(t, context, action, reward) {
32 |       arm                                       <- action$choice
33 |       reward                                    <- reward$reward
34 |       self$theta$n[[arm]][self$cluster]         <- self$theta$n[[arm]][self$cluster] + 1
35 |       self$theta$mean[[arm]][self$cluster]      <- self$theta$mean[[arm]][self$cluster] +
36 |                                                    (reward - self$theta$mean[[arm]][self$cluster]) /
37 |                                                    self$theta$n[[arm]][self$cluster]
38 |       self$theta
39 |     }
40 |   )
41 | )
42 | 


--------------------------------------------------------------------------------
/demo/replication_li_2010/demo_yahoo_classes/yahoo_policy_linucb_disjoint.R:
--------------------------------------------------------------------------------
 1 | YahooLinUCBDisjointPolicy <- R6::R6Class(
 2 |   portable = FALSE,
 3 |   class = FALSE,
 4 |   inherit = Policy,
 5 |   public = list(
 6 |     alpha = NULL,
 7 |     class_name = "YahooLinUCBDisjointPolicy",
 8 |     initialize = function(alpha = 0.2) {
 9 |       super$initialize()
10 |       self$alpha  <- alpha
11 |     },
12 |     set_parameters = function(context_params) {
13 |       ul <- length(context_params$unique)
14 | 
15 |       self$theta_to_arms <- list( 'A' = diag(1,ul,ul), 'b' = rep(0,ul),
16 |                                   'A_inv' = solve(diag(1,ul,ul)))
17 |     },
18 |     get_action = function(t, context) {
19 | 
20 |       expected_rewards <- rep(0.0, length(context$arms))
21 |       local_arms       <- context$arms
22 |       for (arm in seq_along(local_arms)) {
23 | 
24 |         x            <- context$X[context$unique,arm]
25 |         A            <- self$theta$A[[local_arms[arm]]]
26 |         A_inv        <- self$theta$A_inv[[local_arms[arm]]]
27 |         b            <- self$theta$b[[local_arms[arm]]]
28 |         theta_hat    <- A_inv %*% b
29 |         mean         <- x %*% theta_hat
30 |         sd           <- sqrt(tcrossprod(x %*% A_inv, x))
31 |         expected_rewards[arm] <- mean + self$alpha * sd
32 |       }
33 |       action$choice  <- context$arms[which_max_tied(expected_rewards)]
34 | 
35 |       action
36 |     },
37 |     set_reward = function(t, context, action, reward) {
38 | 
39 |       arm                       <- action$choice
40 |       arm_index                 <- which(context$arms == arm)
41 |       reward                    <- reward$reward
42 |       x                         <- context$X[context$unique,arm_index]
43 |       A_inv                     <- self$theta$A_inv[[arm]]
44 |       self$theta$A_inv[[arm]]   <- sherman_morrisson(self$theta$A_inv[[arm]],x)
45 |       self$theta$A[[arm]]       <- self$theta$A[[arm]] + outer(x, x)
46 |       self$theta$b[[arm]]       <- self$theta$b[[arm]] + reward * x
47 | 
48 |       self$theta
49 |     }
50 |   )
51 | )
52 | 


--------------------------------------------------------------------------------
/demo/replication_li_2010/demo_yahoo_classes/yahoo_policy_random.R:
--------------------------------------------------------------------------------
 1 | YahooRandomPolicy <- R6::R6Class(
 2 |   portable = FALSE,
 3 |   class = FALSE,
 4 |   inherit = Policy,
 5 |   public = list(
 6 |     class_name = "YahooRandomPolicy",
 7 |     initialize = function() {
 8 |       super$initialize()
 9 |     },
10 |     set_parameters = function(context_params) {
11 |       self$theta_to_arms          <- list('n' = 0, 'mean' = 0)
12 |     },
13 |     get_action = function(t, context) {
14 |       action$choice               <- sample(context$arms, 1)
15 |       action
16 |     },
17 |     set_reward = function(t, context, action, reward) {
18 |       arm                         <- action$choice
19 |       reward                      <- reward$reward
20 |       inc(self$theta$n[[arm]])    <- 1
21 |       inc(self$theta$mean[[arm]]) <- (reward - self$theta$mean[[arm]]) / self$theta$n[[arm]]
22 |       self$theta
23 |     }
24 |   )
25 | )
26 | 


--------------------------------------------------------------------------------
/demo/replication_li_2010/demo_yahoo_classes/yahoo_policy_ucb1_alpha.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | YahooUCB1AlphaPolicy <- R6::R6Class(
 3 |   portable = FALSE,
 4 |   class = FALSE,
 5 |   inherit = Policy,
 6 |   public = list(
 7 |     alpha = NULL,
 8 |     class_name = "YahooUCB1AlphaPolicy",
 9 |     initialize = function(alpha) {
10 |       super$initialize()
11 |       self$alpha                  <- alpha
12 |     },
13 |     set_parameters = function(context_params) {
14 |       self$theta_to_arms <- list('n' = 0, 'mean' = 0)
15 |     },
16 |     get_action = function(t, context) {
17 |       local_arms       <- context$arms
18 |       n_zero_arms      <- which(self$theta$n[local_arms] == 0)
19 |       if (length(n_zero_arms) > 0) {
20 |         action$choice <- local_arms[sample_one_of(n_zero_arms)]
21 |         return(action)
22 |       }
23 |       expected_rewards <- rep(0.0, length(context$arms))
24 |       for (arm in seq_along(local_arms)) {
25 |         # usb1 variance as in Li 2010 paper
26 |         variance <- self$alpha / sqrt( self$theta$n[[local_arms[arm]]] )
27 |         expected_rewards[arm] <- self$theta$mean[[local_arms[arm]]] + variance
28 |       }
29 |       action$choice <- local_arms[which_max_tied(expected_rewards)]
30 |       action
31 |     },
32 |     set_reward = function(t, context, action, reward) {
33 | 
34 |       arm                         <- action$choice
35 |       reward                      <- reward$reward
36 |       self$theta$n[[arm]]         <- self$theta$n[[arm]] + 1
37 |       self$theta$mean[[arm]]      <- self$theta$mean[[arm]] + (reward - self$theta$mean[[arm]]) / self$theta$n[[arm]]
38 | 
39 |       self$theta
40 |     }
41 |   )
42 | )
43 | 


--------------------------------------------------------------------------------
/demo/replication_li_2010/demo_yahoo_classes/yahoo_policy_ucb1_alpha_seg.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | YahooUCB1AlphaSegPolicy <- R6::R6Class(
 3 |   portable = FALSE,
 4 |   class = FALSE,
 5 |   inherit = Policy,
 6 |   public = list(
 7 |     alpha = NULL,
 8 |     cluster = NULL,
 9 |     class_name = "YahooUCB1AlphaSegPolicy",
10 |     initialize = function(alpha) {
11 |       super$initialize()
12 |       self$alpha                  <- alpha
13 |     },
14 |     set_parameters = function(context_params) {
15 |       self$theta_to_arms          <- list('n' = rep(0,5), 'mean' = rep(0,5))
16 |     },
17 |     get_action = function(t, context) {
18 |       # find the feature on which a user scores highest - that is this user's cluster
19 |       self$cluster                <- which.max(head(context$X[context$unique,1],-1))
20 |       local_arms                  <- context$arms
21 |       for (arm in seq_along(local_arms)) {
22 |         if(self$theta$n[[local_arms[arm]]][self$cluster] == 0) {
23 |           action$choice             <- local_arms[arm]
24 |           return(action)
25 |         }
26 |       }
27 |       expected_rewards            <- rep(0.0, length(local_arms))
28 |       for (arm in seq_along(local_arms)) {
29 |         variance                  <- self$alpha / sqrt( self$theta$n[[local_arms[arm]]][self$cluster] )
30 |         expected_rewards[arm]     <- self$theta$mean[[local_arms[arm]]][self$cluster] + variance
31 |       }
32 |       action$choice               <- local_arms[which_max_tied(expected_rewards)]
33 |       action
34 |     },
35 |     set_reward = function(t, context, action, reward) {
36 | 
37 |       arm                                       <- action$choice
38 |       reward                                    <- reward$reward
39 |       self$theta$n[[arm]][self$cluster]         <- self$theta$n[[arm]][self$cluster] + 1
40 |       self$theta$mean[[arm]][self$cluster]      <- self$theta$mean[[arm]][self$cluster] +
41 |                                                     (reward - self$theta$mean[[arm]][self$cluster]) /
42 |                                                     self$theta$n[[arm]][self$cluster]
43 |       self$theta
44 |     }
45 |   )
46 | )
47 | 


--------------------------------------------------------------------------------
/demo/replication_li_2010/demo_yahoo_exploration/plots.R:
--------------------------------------------------------------------------------
 1 | library(DBI)
 2 | library(MonetDBLite)
 3 | library(ggplot2)
 4 | 
 5 | con <- DBI::dbConnect(MonetDB.R(), host="monetdb_ip", dbname="yahoo", user="monetdb", password="monetdb")
 6 | print(paste0("MonetDB: connection to '",dbListTables(con),"' database succesful!"))
 7 | 
 8 | times <- dbGetQuery(con, "SELECT timestamped, COUNT(timestamped) FROM yahoo GROUP BY timestamped")
 9 | names(times) <- c('timestamped', 'count')
10 | times$timestamped <- as.POSIXct(times$timestamped, origin = "1970-01-01")
11 | 
12 | # Traffic ----------------------------------------------------------------------------------------------------
13 | 
14 | times <- dbGetQuery(con, "SELECT timestamped, COUNT(timestamped) FROM yahoo GROUP BY timestamped")
15 | names(times) <- c('timestamped', 'count')
16 | times$timestamped <- as.POSIXct(times$timestamped, origin = "1970-01-01")
17 | ggplot(times, aes(timestamped, count)) + geom_line() + ggtitle("Traffic")
18 | 
19 | # CTR over time ----------------------------------------------------------------------------------------------
20 | 
21 | ctr <-
22 |   dbGetQuery(con, "SELECT timestamped, AVG(click) FROM yahoo GROUP BY timestamped")
23 | names(ctr) <- c('timestamped', 'ctr')
24 | ctr$timestamped <- as.POSIXct(ctr$timestamped, origin = "1970-01-01")
25 | ggplot(ctr, aes(timestamped, ctr)) + geom_line() + ggtitle("CTR")
26 | 
27 | # clickthrough rates, no context, no cluster -----------------------------------------------------------------
28 | 
29 | ctrs <- dbGetQuery(con, 'SELECT article_id, AVG(click) as ctr  from yahoo GROUP BY article_id ORDER BY ctr')
30 | 
31 | barplot(ctrs$ctr, names.arg=ctrs$article_id, ylim=c(0,0.1))
32 | 
33 | # top 5
34 | 
35 | barplot(tail(ctrs$ctr,5), names.arg=tail(ctrs$article_id,5), ylim=c(0,0.1))
36 | 
37 | # worst 10
38 | 
39 | barplot(head(ctrs$ctr,5), names.arg=head(ctrs$article_id,5), ylim=c(0,0.1))
40 | 
41 | # disconnect from and then shutdown DB -----------------------------------------------------------------------
42 | 
43 | dbDisconnect(con, shutdown = TRUE)
44 | 


--------------------------------------------------------------------------------
/demo/replication_van_emden_2018/section_2_3.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | 
 3 | bandit  <- ContextualBernoulliBandit$new(matrix(c(0.5, 0.2, 0.1), 1))
 4 | policy  <- EpsilonGreedyPolicy$new(0.1)
 5 | agent   <- Agent$new(policy,bandit)
 6 | sim     <- Simulator$new(agent, simulations = 10000, horizon = 100)
 7 | history <- sim$run()
 8 | 
 9 | summary(history)
10 | 
11 | par(mfrow = c(1, 1), mar = c(4, 4, 0.5, 1), cex=1.3)
12 | plot(history, type = "arms", no_par = TRUE)
13 | 


--------------------------------------------------------------------------------
/demo/replication_van_emden_2018/section_3_2_1.R:
--------------------------------------------------------------------------------
 1 | # Load and attach the contextual package.
 2 | library(contextual)
 3 | # Define for how long the simulation will run.
 4 | horizon <- 400
 5 | # Define how many times to repeat the simulation.
 6 | simulations <- 10000
 7 | # Define the probability that each ad will be clicked.
 8 | click_probabilities <- matrix(c(0.6, 0.4, 0.2), nrow = 1, ncol = 3, byrow = TRUE)
 9 | # Initialize a SyntheticBandit, which takes probabilites per arm for an argument.
10 | bandit <- ContextualBernoulliBandit$new(weights = click_probabilities)
11 | # Initialize EpsilonGreedyPolicy with a 40% exploiration rate.
12 | eg_policy <- EpsilonGreedyPolicy$new(epsilon = 0.4)
13 | # Initialize EpsilonFirstPolicy with a .25 x 400 = 100 step exploration period.
14 | ef_policy <- EpsilonFirstPolicy$new(epsilon = 0.25, N = horizon)
15 | # Initialize two Agents, binding each policy to a bandit.
16 | ef_agent <- Agent$new(ef_policy, bandit)
17 | eg_agent <- Agent$new(eg_policy, bandit)
18 | # Assign both agents to a list.
19 | agents <- list(ef_agent, eg_agent)
20 | # Initialize a Simulator with the agent list, horizon, and number of simulations.
21 | simulator <- Simulator$new(agents, horizon, simulations, do_parallel = TRUE)
22 | # Now run the simulator.
23 | history <- simulator$run()
24 | # Finally, plot the average reward per time step t
25 | par(mfrow = c(1, 2), mar = c(2,4,1,1), cex=1.4)
26 | plot(history, type = "average", regret = FALSE, no_par = TRUE, legend_border = FALSE, legend_position = "bottomright")
27 | # And the cumulative reward rate, which equals the Click Through Rate)
28 | plot(history, type = "cumulative", regret = FALSE, rate = TRUE,  no_par = TRUE, legend_border = FALSE, legend_position = "bottomright")
29 | par(mfrow = c(1, 1))
30 | 


--------------------------------------------------------------------------------
/demo/replication_van_emden_2018/section_3_2_2.R:
--------------------------------------------------------------------------------
 1 | # Load and attach the contextual package.
 2 | library(contextual)
 3 | # Define for how long the simulation will run.
 4 | horizon <- 400
 5 | # Define how many times to repeat the simulation.
 6 | simulations <- 10000
 7 | # Define the probability that each ad will be clicked.
 8 | click_probabilities <- matrix(c(0.6, 0.4, 0.2), nrow = 1, ncol = 3, byrow = TRUE)
 9 | # Initialize a SyntheticBandit, which takes probabilites per arm for an argument.
10 | bandit <- ContextualBernoulliBandit$new(weights = click_probabilities)
11 | # Initialize EpsilonGreedyPolicy with a 40% exploiration rate.
12 | eg_policy <- EpsilonGreedyPolicy$new(epsilon = 0.4)
13 | # Initialize EpsilonFirstPolicy with a .25 x 400 = 100 step exploration period.
14 | ef_policy <- EpsilonFirstPolicy$new(epsilon = 0.25, N = horizon)
15 | # Initialize two Agents, binding each policy to a bandit.
16 | ef_agent <- Agent$new(ef_policy, bandit)
17 | eg_agent <- Agent$new(eg_policy, bandit)
18 | # Assign both agents to a list.
19 | 
20 | ##################################################################################################
21 | #                        +-----+----+----------->  arms:  three ads
22 | #                        |     |    |
23 | click_probs <- matrix(c(0.5,  0.7, 0.1,  # -> context 1: older (p=.5)
24 |                         0.7,  0.1, 0.3), # -> context 2: young (p=.5)
25 | 
26 |                       nrow = 2, ncol = 3, byrow = TRUE)
27 | 
28 | # Initialize a SyntheticBandit with contextual weights
29 | context_bandit <- ContextualBernoulliBandit$new(weights = click_probs)
30 | # Initialize LinUCBDisjointPolicy
31 | lucb_policy    <- LinUCBDisjointPolicy$new(0.6)
32 | # Initialize three Agents, binding each policy to a bandit.
33 | ef_agent       <- Agent$new(ef_policy,   context_bandit)
34 | eg_agent       <- Agent$new(eg_policy,   context_bandit)
35 | lucb_agent     <- Agent$new(lucb_policy, context_bandit)
36 | # Assign all agents to a list.
37 | agents <- list(ef_agent, eg_agent, lucb_agent)
38 | # Initialize a Simulator with the agent list, horizon, and nr of simulations
39 | simulator <- Simulator$new(agents, horizon, simulations)
40 | # Now run the simulator.
41 | history <- simulator$run()
42 | 
43 | par(mfrow = c(1, 2), mar = c(2,4,1,1) , cex=1.4)
44 | # Finally, plot the average reward per time step t
45 | plot(history, type = "average", regret = FALSE, no_par = TRUE, legend_border = FALSE, legend_position = "bottomright")
46 | # And the cumulative reward rate, which equals the Click Through Rate)
47 | plot(history, type = "cumulative", regret = FALSE, rate = TRUE, no_par = TRUE, legend_border = FALSE, legend_position = "bottomright")
48 | par(mfrow = c(1, 1))
49 | 


--------------------------------------------------------------------------------
/demo/replication_van_emden_2018/section_4_2_plot.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | 
 3 | bandit <- ContextualBernoulliBandit$new(weights = matrix(c(0.7, 0.2, 0.2),1,3))
 4 | 
 5 | agents <- list(Agent$new(RandomPolicy$new(), bandit),
 6 |                Agent$new(OraclePolicy$new(), bandit),
 7 |                Agent$new(ThompsonSamplingPolicy$new(1.0, 1.0), bandit),
 8 |                Agent$new(Exp3Policy$new(0.1), bandit),
 9 |                Agent$new(GittinsBrezziLaiPolicy$new(), bandit),
10 |                Agent$new(UCB1Policy$new(), bandit))
11 | 
12 | history <- Simulator$new(agents, horizon = 100, simulations = 300)$run()
13 | 
14 | par(mfrow = c(3, 2), mar = c(1, 4, 2, 1), cex=1.3)  #bottom, left, top, and right.
15 | plot(history, type = "cumulative", use_colors = FALSE, no_par = TRUE, legend_border = FALSE,
16 |      limit_agents = c("GittinsBrezziLai", "UCB1","ThompsonSampling"))
17 | 
18 | plot(history, type = "cumulative", regret = FALSE, legend = FALSE,
19 |      limit_agents = c("Exp3"), traces = TRUE, no_par = TRUE)
20 | 
21 | plot(history, type = "cumulative", regret = FALSE, rate = TRUE, disp = "sd",
22 |      limit_agents = c("Exp3", "ThompsonSampling"), legend_border = FALSE,
23 |      legend_position = "bottomright", no_par = TRUE)
24 | 
25 | plot(history, type = "cumulative", rate = TRUE, plot_only_disp = TRUE,
26 |      disp = "var", smooth = TRUE, limit_agents = c("Exp3", "ThompsonSampling"),
27 |      legend_border = FALSE, legend_position = "bottomright", no_par = TRUE)
28 | 
29 | plot(history, type = "average", disp = "ci", regret = FALSE, interval = 10,
30 |      smooth = TRUE, legend_position = "bottomright", no_par = TRUE, legend = FALSE)
31 | 
32 | plot(history, limit_agents = c("ThompsonSampling"), type = "arms",
33 |      interval = 20, no_par = TRUE)
34 | 
35 | par(mfrow = c(1, 1))
36 | 


--------------------------------------------------------------------------------
/demo/replication_van_emden_2018/section_5_2.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | 
 3 | horizon            <- 100
 4 | simulations        <- 1000
 5 | weights            <- matrix(c(0.6, 0.2, 0.2), 1, 3)
 6 | 
 7 | policy             <- EpsilonFirstPolicy$new(epsilon = 0.5, N = horizon)
 8 | bandit             <- ContextualBernoulliBandit$new(weights = weights)
 9 | 
10 | agent              <- Agent$new(policy,bandit)
11 | 
12 | simulator          <- Simulator$new(agents = agent,
13 |                                     horizon = horizon,
14 |                                     simulations = simulations)
15 | 
16 | history            <- simulator$run()
17 | 
18 | par(mfrow = c(1, 2), mar = c(2, 4, 1, 1), cex=1.4)  #bottom, left, top, and right.
19 | plot(history, type = "cumulative", no_par = TRUE, legend_border = FALSE, legend_position = "bottomright")
20 | plot(history, type = "arms", no_par = TRUE)
21 | par(mfrow = c(1, 1))
22 | 


--------------------------------------------------------------------------------
/demo/replication_van_emden_2018/section_5_3.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | 
 3 | horizon            <- 100
 4 | simulations        <- 1000
 5 | weights            <- matrix(c(0.8, 0.2, 0.2), 1, 3)
 6 | 
 7 | policy             <- EpsilonGreedyPolicy$new(epsilon = 0.1)
 8 | bandit             <- ContextualBernoulliBandit$new(weights = weights)
 9 | 
10 | agent              <- Agent$new(policy,bandit)
11 | 
12 | simulator          <- Simulator$new(agents = agent,
13 |                                     horizon = horizon,
14 |                                     simulations = simulations)
15 | 
16 | history            <- simulator$run()
17 | 
18 | par(mfrow = c(1, 2), mar = c(2, 4, 1, 1), cex=1.4)  #bottom, left, top, and right.
19 | plot(history, type = "cumulative", no_par = TRUE, legend_border = FALSE)
20 | plot(history, type = "arms", no_par = TRUE)
21 | par(mfrow = c(1, 1))
22 | 


--------------------------------------------------------------------------------
/demo/replication_van_emden_2018/section_5_4.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | 
 3 | horizon     <- 100L
 4 | simulations <- 1000L
 5 | 
 6 |                       # k=1  k=2  k=3             -> columns represent arms
 7 | weights     <- matrix(c(0.6, 0.2, 0.2,     # d=1  -> rows represent
 8 |                         0.2, 0.6, 0.2,     # d=2     context features,
 9 |                         0.2, 0.2, 0.6),    # d=3
10 | 
11 |                       nrow = 3, ncol = 3, byrow = TRUE)
12 | 
13 | bandit      <- ContextualBernoulliBandit$new(weights = weights)
14 | 
15 | eg_policy   <- EpsilonGreedyPolicy$new(0.1)
16 | lucb_policy <- LinUCBDisjointPolicy$new(0.6)
17 | 
18 | agents      <- list(Agent$new(eg_policy, bandit, "EGreedy"),
19 |                     Agent$new(lucb_policy, bandit, "LinUCB"))
20 | 
21 | simulation  <- Simulator$new(agents, horizon, simulations, save_context = TRUE)
22 | history     <- simulation$run()
23 | 
24 | par(mfrow = c(2, 3), mar = c(2, 4, 1, 0.1), cex=1.3)  #bottom, left, top, and right.
25 | 
26 | plot(history, type = "cumulative", legend_border = FALSE, no_par = TRUE )
27 | plot(history, type = "arms",  limit_agents = c("LinUCB"), no_par = TRUE)
28 | plot(history, type = "arms",  limit_agents = c("EGreedy"), no_par = TRUE)
29 | 
30 | plot(history, type = "arms",  limit_agents = c("LinUCB"), limit_context = c("X.1"), no_par = TRUE)
31 | plot(history, type = "arms",  limit_agents = c("LinUCB"), limit_context = c("X.2"), no_par = TRUE )
32 | plot(history, type = "arms",  limit_agents = c("LinUCB"), limit_context = c("X.3"), no_par = TRUE )
33 | 
34 | par(mfrow = c(1, 1))
35 | 


--------------------------------------------------------------------------------
/demo/replication_van_emden_2018/section_6.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | 
 3 | BasicPoissonBandit <- R6::R6Class(
 4 |   inherit = BasicBernoulliBandit,
 5 |   class = FALSE,
 6 |   public = list(
 7 |     weights = NULL,
 8 |     class_name = "BasicPoissonBandit",
 9 |     # Override get_reward & generate Poisson based rewards
10 |     get_reward = function(t, context, action) {
11 |       reward_means = rep(2,self$k)
12 |       rpm <- rpois(self$k, reward_means)
13 |       rewards <- matrix(rpm < self$weights, self$k, 1)*1
14 |       optimal_arm    <- which_max_tied(self$weights)
15 |       reward         <- list(
16 |         reward                   = rewards[action$choice],
17 |         optimal_arm              = optimal_arm,
18 |         optimal_reward           = rewards[optimal_arm]
19 |       )
20 |     }
21 |   )
22 | )
23 | 
24 | EpsilonGreedyAnnealingPolicy <- R6::R6Class(
25 |   # Class extends EpsilonGreedyPolicy
26 |   inherit = EpsilonGreedyPolicy,
27 |   portable = FALSE,
28 |   public = list(
29 |     class_name = "EpsilonGreedyAnnealingPolicy",
30 |     # Override EpsilonGreedyPolicy's get_action, use annealing epsilon
31 |     get_action = function(t, context) {
32 |       self$epsilon <- 1/(log(100*t+0.001))
33 |       super$get_action(t, context)
34 |     }
35 |   )
36 | )
37 | 
38 | weights <- c(7,1,2)
39 | horizon <- 200
40 | simulations <- 1000
41 | bandit <- BasicPoissonBandit$new(weights)
42 | ega_policy <- EpsilonGreedyAnnealingPolicy$new()
43 | eg_policy  <- EpsilonGreedyPolicy$new(0.2)
44 | agents <- list(Agent$new(ega_policy, bandit, "EG Annealing"),
45 |                Agent$new(eg_policy, bandit, "EG"))
46 | simulation <- Simulator$new(agents, horizon, simulations, do_parallel = FALSE)
47 | history <- simulation$run()
48 | 
49 | 
50 | par(mfrow = c(1, 3), mar = c(2, 4, 1, 0.1), cex=1.3)  #bottom, left, top, and right.
51 | 
52 | 
53 | plot(history, type = "cumulative", no_par = TRUE, legend_border = FALSE,
54 |               legend_position = "bottomright")
55 | plot(history, type = "arms",  limit_agents = c("EG"), no_par = TRUE,
56 |               interval = 25)
57 | plot(history, type = "arms",  limit_agents = c("EG Annealing"), no_par = TRUE,
58 |               interval = 25)
59 | 
60 | par(mfrow = c(1, 1))
61 | 


--------------------------------------------------------------------------------
/demo/replication_van_emden_2018/section_7.R:
--------------------------------------------------------------------------------
 1 | library(contextual)
 2 | library(data.table)
 3 | 
 4 | # Define Replay Bandit
 5 | OfflineReplayEvaluatorBandit <- R6::R6Class(
 6 |   inherit = Bandit,
 7 |   private = list(
 8 |     S = NULL
 9 |   ),
10 |   public = list(
11 |     class_name = "OfflineReplayEvaluatorBandit",
12 |     initialize   = function(offline_data, k, d) {
13 |       self$k <- k                 # Number of arms
14 |       self$d <- d                 # Context feature vector dimensions
15 |       private$S <- offline_data   # Logged events
16 |     },
17 |     get_context = function(index) {
18 |       context <- list(
19 |         k = self$k,
20 |         d = self$d,
21 |         X = private$S$context[[index]]
22 |       )
23 |       context
24 |     },
25 |     get_reward = function(index, context, action) {
26 |       if (private$S$choice[[index]] == action$choice) {
27 |         list(
28 |           reward = as.double(private$S$reward[[index]])
29 |         )
30 |       } else {
31 |         NULL
32 |       }
33 |     }
34 |   )
35 | )
36 | 
37 | # Import personalization data-set
38 | url         <- "http://d1ie9wlkzugsxr.cloudfront.net/data_cmab_basic/dataset.txt"
39 | datafile    <- fread(url)
40 | 
41 | # Clean up datafile
42 | datafile[, context := as.list(as.data.frame(t(datafile[, 3:102])))]
43 | datafile[, (3:102) := NULL]
44 | datafile[, t := .I]
45 | datafile[, sim := 1]
46 | datafile[, agent := "linucb"]
47 | setnames(datafile, c("V1", "V2"), c("choice", "reward"))
48 | 
49 | # Set simulation parameters.
50 | simulations <- 1
51 | horizon     <- nrow(datafile)
52 | 
53 | # Initiate Replay bandit with 10 arms and 100 context dimensions
54 | log_S       <- datafile
55 | bandit      <- OfflineReplayEvaluatorBandit$new(log_S, k = 10, d = 100)
56 | 
57 | # Define agents.
58 | agents      <-
59 |   list(Agent$new(LinUCBDisjointOptimizedPolicy$new(0.01), bandit, "alpha = 0.01"),
60 |        Agent$new(LinUCBDisjointOptimizedPolicy$new(0.05), bandit, "alpha = 0.05"),
61 |        Agent$new(LinUCBDisjointOptimizedPolicy$new(0.1), bandit, "alpha = 0.1"),
62 |        Agent$new(LinUCBDisjointOptimizedPolicy$new(1.0), bandit, "alpha = 1.0"))
63 | 
64 | # Initialize the simulation.
65 | simulation  <-
66 |   Simulator$new(
67 |     agents           = agents,
68 |     simulations      = simulations,
69 |     horizon          = horizon,
70 |     save_context     = TRUE
71 |   )
72 | 
73 | # Run the simulation.
74 | linucb_sim  <- simulation$run()
75 | 
76 | # plot the results
77 | par(mfrow = c(1, 1), mar = c(4, 4, 0.5, 1), cex=1.3)
78 | plot(linucb_sim, type = "cumulative", regret = FALSE, legend_title = "LinUCB",
79 |      rate = TRUE, legend_position = "bottomright")
80 | 


--------------------------------------------------------------------------------
/demo/replication_van_emden_2018/section_8.R:
--------------------------------------------------------------------------------
1 | # See the demo/replication_li_2010 directory.


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/fig/all_cmab_phases_Part1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/all_cmab_phases_Part1.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/fig/all_cmab_phases_Part2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/all_cmab_phases_Part2.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/fig/all_cmab_phases_Part3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/all_cmab_phases_Part3.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/fig/all_cmab_phases_Part4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/all_cmab_phases_Part4.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/fig/all_cmab_phases_Part5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/all_cmab_phases_Part5.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/fig/all_cmab_phases_Part6.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/all_cmab_phases_Part6.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/fig/all_cmab_phases_Part7.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/all_cmab_phases_Part7.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/fig/all_cmab_phases_Part8.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/all_cmab_phases_Part8.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/fig/cmab_chart.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/cmab_chart.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/fig/contextual_class.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/contextual_class.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/fig/contextual_sequence.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/contextual_sequence.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/fig/offline_bandit.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/offline_bandit.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/fig/section_2_3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/section_2_3.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/fig/section_3_2_1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/section_3_2_1.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/fig/section_3_2_2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/section_3_2_2.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/fig/section_4_2_plot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/section_4_2_plot.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/fig/section_5_2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/section_5_2.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/fig/section_5_3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/section_5_3.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/fig/section_5_4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/section_5_4.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/fig/section_5_5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/section_5_5.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/fig/section_8_bar.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/section_8_bar.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/fig/section_8_plot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/section_8_plot.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/jss.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/jss.pdf


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/jss.synctex.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/jss.synctex.gz


--------------------------------------------------------------------------------
/docs/articles/arxiv_2018/jsslogo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/jsslogo.jpg


--------------------------------------------------------------------------------
/docs/articles/basic_epsilon_greedy.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/basic_epsilon_greedy.jpeg


--------------------------------------------------------------------------------
/docs/articles/basic_epsilon_greedy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/basic_epsilon_greedy.jpg


--------------------------------------------------------------------------------
/docs/articles/basic_epsilon_greedy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/basic_epsilon_greedy.png


--------------------------------------------------------------------------------
/docs/articles/carskit_depaul.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/carskit_depaul.jpeg


--------------------------------------------------------------------------------
/docs/articles/carskit_depaul.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/carskit_depaul.jpg


--------------------------------------------------------------------------------
/docs/articles/carskit_depaul.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/carskit_depaul.png


--------------------------------------------------------------------------------
/docs/articles/cmabs.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/cmabs.jpeg


--------------------------------------------------------------------------------
/docs/articles/cmabs.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/cmabs.jpg


--------------------------------------------------------------------------------
/docs/articles/cmabs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/cmabs.png


--------------------------------------------------------------------------------
/docs/articles/compare.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/compare.png


--------------------------------------------------------------------------------
/docs/articles/contextual-fig-1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/contextual-fig-1.jpg


--------------------------------------------------------------------------------
/docs/articles/contextual-fig-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/contextual-fig-1.png


--------------------------------------------------------------------------------
/docs/articles/contextual-fig-2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/contextual-fig-2.jpg


--------------------------------------------------------------------------------
/docs/articles/contextual-fig-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/contextual-fig-2.png


--------------------------------------------------------------------------------
/docs/articles/eckles_kaptein_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eckles_kaptein_1.jpg


--------------------------------------------------------------------------------
/docs/articles/eckles_kaptein_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eckles_kaptein_1.png


--------------------------------------------------------------------------------
/docs/articles/eg_average_reward.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_average_reward.jpeg


--------------------------------------------------------------------------------
/docs/articles/eg_average_reward.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_average_reward.jpg


--------------------------------------------------------------------------------
/docs/articles/eg_average_reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_average_reward.png


--------------------------------------------------------------------------------
/docs/articles/eg_cumulative_reward.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_cumulative_reward.jpeg


--------------------------------------------------------------------------------
/docs/articles/eg_cumulative_reward.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_cumulative_reward.jpg


--------------------------------------------------------------------------------
/docs/articles/eg_cumulative_reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_cumulative_reward.png


--------------------------------------------------------------------------------
/docs/articles/eg_incorrect.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_incorrect.jpeg


--------------------------------------------------------------------------------
/docs/articles/eg_incorrect.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_incorrect.jpg


--------------------------------------------------------------------------------
/docs/articles/eg_incorrect.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_incorrect.png


--------------------------------------------------------------------------------
/docs/articles/eg_optimal_action.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_optimal_action.jpeg


--------------------------------------------------------------------------------
/docs/articles/eg_optimal_action.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_optimal_action.jpg


--------------------------------------------------------------------------------
/docs/articles/eg_optimal_action.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_optimal_action.png


--------------------------------------------------------------------------------
/docs/articles/linucboffline.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/linucboffline.jpeg


--------------------------------------------------------------------------------
/docs/articles/linucboffline.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/linucboffline.jpg


--------------------------------------------------------------------------------
/docs/articles/linucboffline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/linucboffline.png


--------------------------------------------------------------------------------
/docs/articles/mabs.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/mabs.jpeg


--------------------------------------------------------------------------------
/docs/articles/mabs.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/mabs.jpg


--------------------------------------------------------------------------------
/docs/articles/mabs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/mabs.png


--------------------------------------------------------------------------------
/docs/articles/ml10m.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/ml10m.jpg


--------------------------------------------------------------------------------
/docs/articles/ml10m.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/ml10m.png


--------------------------------------------------------------------------------
/docs/articles/replication-fig-1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/replication-fig-1.jpg


--------------------------------------------------------------------------------
/docs/articles/replication-fig-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/replication-fig-1.png


--------------------------------------------------------------------------------
/docs/articles/replication-fig-2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/replication-fig-2.jpg


--------------------------------------------------------------------------------
/docs/articles/replication-fig-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/replication-fig-2.png


--------------------------------------------------------------------------------
/docs/articles/softmax_average_reward.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/softmax_average_reward.jpeg


--------------------------------------------------------------------------------
/docs/articles/softmax_average_reward.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/softmax_average_reward.jpg


--------------------------------------------------------------------------------
/docs/articles/softmax_average_reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/softmax_average_reward.png


--------------------------------------------------------------------------------
/docs/articles/softmax_cumulative_reward.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/softmax_cumulative_reward.jpeg


--------------------------------------------------------------------------------
/docs/articles/softmax_cumulative_reward.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/softmax_cumulative_reward.jpg


--------------------------------------------------------------------------------
/docs/articles/softmax_cumulative_reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/softmax_cumulative_reward.png


--------------------------------------------------------------------------------
/docs/articles/softmax_optimal_action.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/softmax_optimal_action.jpeg


--------------------------------------------------------------------------------
/docs/articles/softmax_optimal_action.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/softmax_optimal_action.jpg


--------------------------------------------------------------------------------
/docs/articles/softmax_optimal_action.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/softmax_optimal_action.png


--------------------------------------------------------------------------------
/docs/articles/sutton_eg_1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_eg_1.jpeg


--------------------------------------------------------------------------------
/docs/articles/sutton_eg_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_eg_1.jpg


--------------------------------------------------------------------------------
/docs/articles/sutton_eg_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_eg_1.png


--------------------------------------------------------------------------------
/docs/articles/sutton_eg_2.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_eg_2.jpeg


--------------------------------------------------------------------------------
/docs/articles/sutton_eg_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_eg_2.jpg


--------------------------------------------------------------------------------
/docs/articles/sutton_eg_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_eg_2.png


--------------------------------------------------------------------------------
/docs/articles/sutton_gradient.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_gradient.jpeg


--------------------------------------------------------------------------------
/docs/articles/sutton_gradient.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_gradient.jpg


--------------------------------------------------------------------------------
/docs/articles/sutton_gradient.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_gradient.png


--------------------------------------------------------------------------------
/docs/articles/sutton_optimistic.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_optimistic.jpeg


--------------------------------------------------------------------------------
/docs/articles/sutton_optimistic.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_optimistic.jpg


--------------------------------------------------------------------------------
/docs/articles/sutton_optimistic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_optimistic.png


--------------------------------------------------------------------------------
/docs/articles/sutton_ucb.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_ucb.jpeg


--------------------------------------------------------------------------------
/docs/articles/sutton_ucb.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_ucb.jpg


--------------------------------------------------------------------------------
/docs/articles/sutton_ucb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_ucb.png


--------------------------------------------------------------------------------
/docs/articles/sutton_violin.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_violin.jpeg


--------------------------------------------------------------------------------
/docs/articles/sutton_violin.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_violin.jpg


--------------------------------------------------------------------------------
/docs/articles/sutton_violin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_violin.png


--------------------------------------------------------------------------------
/docs/articles/ucb_average_reward.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/ucb_average_reward.jpeg


--------------------------------------------------------------------------------
/docs/articles/ucb_average_reward.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/ucb_average_reward.jpg


--------------------------------------------------------------------------------
/docs/articles/ucb_average_reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/ucb_average_reward.png


--------------------------------------------------------------------------------
/docs/articles/ucb_cumulative_reward.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/ucb_cumulative_reward.jpeg


--------------------------------------------------------------------------------
/docs/articles/ucb_cumulative_reward.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/ucb_cumulative_reward.jpg


--------------------------------------------------------------------------------
/docs/articles/ucb_cumulative_reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/ucb_cumulative_reward.png


--------------------------------------------------------------------------------
/docs/articles/ucb_optimal_action.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/ucb_optimal_action.jpeg


--------------------------------------------------------------------------------
/docs/articles/ucb_optimal_action.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/ucb_optimal_action.jpg


--------------------------------------------------------------------------------
/docs/articles/ucb_optimal_action.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/ucb_optimal_action.png


--------------------------------------------------------------------------------
/docs/bootstrap-toc.css:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)
 3 |  * Copyright 2015 Aidan Feldman
 4 |  * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */
 5 | 
 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */
 7 | 
 8 | /* All levels of nav */
 9 | nav[data-toggle='toc'] .nav > li > a {
10 |   display: block;
11 |   padding: 4px 20px;
12 |   font-size: 13px;
13 |   font-weight: 500;
14 |   color: #767676;
15 | }
16 | nav[data-toggle='toc'] .nav > li > a:hover,
17 | nav[data-toggle='toc'] .nav > li > a:focus {
18 |   padding-left: 19px;
19 |   color: #563d7c;
20 |   text-decoration: none;
21 |   background-color: transparent;
22 |   border-left: 1px solid #563d7c;
23 | }
24 | nav[data-toggle='toc'] .nav > .active > a,
25 | nav[data-toggle='toc'] .nav > .active:hover > a,
26 | nav[data-toggle='toc'] .nav > .active:focus > a {
27 |   padding-left: 18px;
28 |   font-weight: bold;
29 |   color: #563d7c;
30 |   background-color: transparent;
31 |   border-left: 2px solid #563d7c;
32 | }
33 | 
34 | /* Nav: second level (shown on .active) */
35 | nav[data-toggle='toc'] .nav .nav {
36 |   display: none; /* Hide by default, but at >768px, show it */
37 |   padding-bottom: 10px;
38 | }
39 | nav[data-toggle='toc'] .nav .nav > li > a {
40 |   padding-top: 1px;
41 |   padding-bottom: 1px;
42 |   padding-left: 30px;
43 |   font-size: 12px;
44 |   font-weight: normal;
45 | }
46 | nav[data-toggle='toc'] .nav .nav > li > a:hover,
47 | nav[data-toggle='toc'] .nav .nav > li > a:focus {
48 |   padding-left: 29px;
49 | }
50 | nav[data-toggle='toc'] .nav .nav > .active > a,
51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a,
52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a {
53 |   padding-left: 28px;
54 |   font-weight: 500;
55 | }
56 | 
57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */
58 | nav[data-toggle='toc'] .nav > .active > ul {
59 |   display: block;
60 | }
61 | 


--------------------------------------------------------------------------------
/docs/docsearch.js:
--------------------------------------------------------------------------------
 1 | $(function() {
 2 | 
 3 |   // register a handler to move the focus to the search bar
 4 |   // upon pressing shift + "/" (i.e. "?")
 5 |   $(document).on('keydown', function(e) {
 6 |     if (e.shiftKey && e.keyCode == 191) {
 7 |       e.preventDefault();
 8 |       $("#search-input").focus();
 9 |     }
10 |   });
11 | 
12 |   $(document).ready(function() {
13 |     // do keyword highlighting
14 |     /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */
15 |     var mark = function() {
16 | 
17 |       var referrer = document.URL ;
18 |       var paramKey = "q" ;
19 | 
20 |       if (referrer.indexOf("?") !== -1) {
21 |         var qs = referrer.substr(referrer.indexOf('?') + 1);
22 |         var qs_noanchor = qs.split('#')[0];
23 |         var qsa = qs_noanchor.split('&');
24 |         var keyword = "";
25 | 
26 |         for (var i = 0; i < qsa.length; i++) {
27 |           var currentParam = qsa[i].split('=');
28 | 
29 |           if (currentParam.length !== 2) {
30 |             continue;
31 |           }
32 | 
33 |           if (currentParam[0] == paramKey) {
34 |             keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20"));
35 |           }
36 |         }
37 | 
38 |         if (keyword !== "") {
39 |           $(".contents").unmark({
40 |             done: function() {
41 |               $(".contents").mark(keyword);
42 |             }
43 |           });
44 |         }
45 |       }
46 |     };
47 | 
48 |     mark();
49 |   });
50 | });
51 | 
52 | /* Search term highlighting ------------------------------*/
53 | 
54 | function matchedWords(hit) {
55 |   var words = [];
56 | 
57 |   var hierarchy = hit._highlightResult.hierarchy;
58 |   // loop to fetch from lvl0, lvl1, etc.
59 |   for (var idx in hierarchy) {
60 |     words = words.concat(hierarchy[idx].matchedWords);
61 |   }
62 | 
63 |   var content = hit._highlightResult.content;
64 |   if (content) {
65 |     words = words.concat(content.matchedWords);
66 |   }
67 | 
68 |   // return unique words
69 |   var words_uniq = [...new Set(words)];
70 |   return words_uniq;
71 | }
72 | 
73 | function updateHitURL(hit) {
74 | 
75 |   var words = matchedWords(hit);
76 |   var url = "";
77 | 
78 |   if (hit.anchor) {
79 |     url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor;
80 |   } else {
81 |     url = hit.url + '?q=' + escape(words.join(" "));
82 |   }
83 | 
84 |   return url;
85 | }
86 | 


--------------------------------------------------------------------------------
/docs/docsearch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "index_name": "nth_iteration_labs_contextual",
 3 |   "start_urls": [
 4 |     {
 5 |       "url": "/index.html",
 6 |       "selectors_key": "homepage",
 7 |       "tags": [
 8 |         "homepage"
 9 |       ]
10 |     },
11 |     {
12 |       "url": "/reference",
13 |       "selectors_key": "reference",
14 |       "tags": [
15 |         "reference"
16 |       ]
17 |     },
18 |     {
19 |       "url": "/articles",
20 |       "selectors_key": "articles",
21 |       "tags": [
22 |         "articles"
23 |       ]
24 |     }
25 |   ],
26 |   "stop_urls": [
27 |     "/reference/$",
28 |     "/reference/index.html",
29 |     "/articles/$",
30 |     "/articles/index.html"
31 |   ],
32 |   "sitemap_urls": [
33 |     "/sitemap.xml"
34 |   ],
35 |   "selectors": {
36 |     "homepage": {
37 |       "lvl0": {
38 |         "selector": ".contents h1",
39 |         "default_value": "contextual Home page"
40 |       },
41 |       "lvl1": {
42 |         "selector": ".contents h2"
43 |       },
44 |       "lvl2": {
45 |         "selector": ".contents h3",
46 |         "default_value": "Context"
47 |       },
48 |       "lvl3": ".ref-arguments td, .ref-description",
49 |       "text": ".contents p, .contents li, .contents .pre"
50 |     },
51 |     "reference": {
52 |       "lvl0": {
53 |         "selector": ".contents h1"
54 |       },
55 |       "lvl1": {
56 |         "selector": ".contents .name",
57 |         "default_value": "Argument"
58 |       },
59 |       "lvl2": {
60 |         "selector": ".ref-arguments th",
61 |         "default_value": "Description"
62 |       },
63 |       "lvl3": ".ref-arguments td, .ref-description",
64 |       "text": ".contents p, .contents li"
65 |     },
66 |     "articles": {
67 |       "lvl0": {
68 |         "selector": ".contents h1"
69 |       },
70 |       "lvl1": {
71 |         "selector": ".contents .name"
72 |       },
73 |       "lvl2": {
74 |         "selector": ".contents h2, .contents h3",
75 |         "default_value": "Context"
76 |       },
77 |       "text": ".contents p, .contents li"
78 |     }
79 |   },
80 |   "selectors_exclude": [
81 |     ".dont-index"
82 |     ],
83 |   "min_indexed_level": 2,
84 |   "custom_settings": {
85 |     "separatorsToIndex": "_",
86 |     "attributesToRetrieve": [
87 |       "hierarchy",
88 |       "content",
89 |       "anchor",
90 |       "url",
91 |       "url_without_anchor"
92 |     ]
93 |   }
94 | }
95 | 
96 | 


--------------------------------------------------------------------------------
/docs/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/favicon.ico


--------------------------------------------------------------------------------
/docs/jquery.sticky-kit.min.js:
--------------------------------------------------------------------------------
 1 | /* Sticky-kit v1.1.2 | WTFPL | Leaf Corcoran 2015 |  */
 2 | /*
 3 |   Source: https://github.com/leafo/sticky-kit
 4 |   License: MIT
 5 | */
 6 | (function(){var b,f;b=this.jQuery||window.jQuery;f=b(window);b.fn.stick_in_parent=function(d){var A,w,J,n,B,K,p,q,k,E,t;null==d&&(d={});t=d.sticky_class;B=d.inner_scrolling;E=d.recalc_every;k=d.parent;q=d.offset_top;p=d.spacer;w=d.bottoming;null==q&&(q=0);null==k&&(k=void 0);null==B&&(B=!0);null==t&&(t="is_stuck");A=b(document);null==w&&(w=!0);J=function(a,d,n,C,F,u,r,G){var v,H,m,D,I,c,g,x,y,z,h,l;if(!a.data("sticky_kit")){a.data("sticky_kit",!0);I=A.height();g=a.parent();null!=k&&(g=g.closest(k));
 7 | if(!g.length)throw"failed to find stick parent";v=m=!1;(h=null!=p?p&&a.closest(p):b("<div />"))&&h.css("position",a.css("position"));x=function(){var c,f,e;if(!G&&(I=A.height(),c=parseInt(g.css("border-top-width"),10),f=parseInt(g.css("padding-top"),10),d=parseInt(g.css("padding-bottom"),10),n=g.offset().top+c+f,C=g.height(),m&&(v=m=!1,null==p&&(a.insertAfter(h),h.detach()),a.css({position:"",top:"",width:"",bottom:""}).removeClass(t),e=!0),F=a.offset().top-(parseInt(a.css("margin-top"),10)||0)-q,
 8 | u=a.outerHeight(!0),r=a.css("float"),h&&h.css({width:a.outerWidth(!0),height:u,display:a.css("display"),"vertical-align":a.css("vertical-align"),"float":r}),e))return l()};x();if(u!==C)return D=void 0,c=q,z=E,l=function(){var b,l,e,k;if(!G&&(e=!1,null!=z&&(--z,0>=z&&(z=E,x(),e=!0)),e||A.height()===I||x(),e=f.scrollTop(),null!=D&&(l=e-D),D=e,m?(w&&(k=e+u+c>C+n,v&&!k&&(v=!1,a.css({position:"fixed",bottom:"",top:c}).trigger("sticky_kit:unbottom"))),e<F&&(m=!1,c=q,null==p&&("left"!==r&&"right"!==r||a.insertAfter(h),
 9 | h.detach()),b={position:"",width:"",top:""},a.css(b).removeClass(t).trigger("sticky_kit:unstick")),B&&(b=f.height(),u+q>b&&!v&&(c-=l,c=Math.max(b-u,c),c=Math.min(q,c),m&&a.css({top:c+"px"})))):e>F&&(m=!0,b={position:"fixed",top:c},b.width="border-box"===a.css("box-sizing")?a.outerWidth()+"px":a.width()+"px",a.css(b).addClass(t),null==p&&(a.after(h),"left"!==r&&"right"!==r||h.append(a)),a.trigger("sticky_kit:stick")),m&&w&&(null==k&&(k=e+u+c>C+n),!v&&k)))return v=!0,"static"===g.css("position")&&g.css({position:"relative"}),
10 | a.css({position:"absolute",bottom:d,top:"auto"}).trigger("sticky_kit:bottom")},y=function(){x();return l()},H=function(){G=!0;f.off("touchmove",l);f.off("scroll",l);f.off("resize",y);b(document.body).off("sticky_kit:recalc",y);a.off("sticky_kit:detach",H);a.removeData("sticky_kit");a.css({position:"",bottom:"",top:"",width:""});g.position("position","");if(m)return null==p&&("left"!==r&&"right"!==r||a.insertAfter(h),h.remove()),a.removeClass(t)},f.on("touchmove",l),f.on("scroll",l),f.on("resize",
11 | y),b(document.body).on("sticky_kit:recalc",y),a.on("sticky_kit:detach",H),setTimeout(l,0)}};n=0;for(K=this.length;n<K;n++)d=this[n],J(b(d));return this}}).call(this);
12 | 


--------------------------------------------------------------------------------
/docs/link.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 19.2.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 20 20" style="enable-background:new 0 0 20 20;" xml:space="preserve">
 5 | <style type="text/css">
 6 | 	.st0{fill:#75AADB;}
 7 | </style>
 8 | <path class="st0" d="M4,11.3h1.3v1.3H4c-2,0-4-2.3-4-4.7s2.1-4.7,4-4.7h5.3c1.9,0,4,2.3,4,4.7c0,1.9-1.2,3.6-2.7,4.3v-1.5
 9 | 	C11.4,10.2,12,9.1,12,8c0-1.7-1.4-3.3-2.7-3.3H4C2.7,4.7,1.3,6.3,1.3,8S2.7,11.3,4,11.3z M16,7.3h-1.3v1.3H16c1.3,0,2.7,1.6,2.7,3.3
10 | 	s-1.4,3.3-2.7,3.3h-5.3C9.4,15.3,8,13.7,8,12c0-1.1,0.6-2.2,1.3-2.8V7.7C7.9,8.4,6.7,10.1,6.7,12c0,2.4,2.1,4.7,4,4.7H16
11 | 	c1.9,0,4-2.3,4-4.7S18,7.3,16,7.3z"/>
12 | </svg>
13 | 


--------------------------------------------------------------------------------
/docs/pkgdown.yml:
--------------------------------------------------------------------------------
 1 | pandoc: 2.7.3
 2 | pkgdown: 1.5.1
 3 | pkgdown_sha: ~
 4 | articles:
 5 |   cmabs: cmabs.html
 6 |   cmabsoffline: cmabsoffline.html
 7 |   eckles_kaptein: eckles_kaptein.html
 8 |   epsilongreedy: epsilongreedy.html
 9 |   introduction: introduction.html
10 |   mabs: mabs.html
11 |   ml10m: ml10m.html
12 |   offline_depaul_movies: offline_depaul_movies.html
13 |   faq: only_pkgdown/faq.html
14 |   replication: replication.html
15 |   simpsons: simpsons.html
16 |   sutton_barto: sutton_barto.html
17 |   website_optimization: website_optimization.html
18 | last_built: 2020-07-25T14:34Z
19 | 
20 | 


--------------------------------------------------------------------------------
/docs/reference/EpsilonFirstPolicy-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/EpsilonFirstPolicy-1.png


--------------------------------------------------------------------------------
/docs/reference/EpsilonFirstPolicy-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/EpsilonFirstPolicy-2.png


--------------------------------------------------------------------------------
/docs/reference/EpsilonGreedyPolicy-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/EpsilonGreedyPolicy-1.png


--------------------------------------------------------------------------------
/docs/reference/EpsilonGreedyPolicy-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/EpsilonGreedyPolicy-2.png


--------------------------------------------------------------------------------
/docs/reference/Exp3Policy-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/Exp3Policy-1.png


--------------------------------------------------------------------------------
/docs/reference/Exp3Policy-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/Exp3Policy-2.png


--------------------------------------------------------------------------------
/docs/reference/GradientPolicy-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/GradientPolicy-1.png


--------------------------------------------------------------------------------
/docs/reference/GradientPolicy-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/GradientPolicy-2.png


--------------------------------------------------------------------------------
/docs/reference/RandomPolicy-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/RandomPolicy-1.png


--------------------------------------------------------------------------------
/docs/reference/SoftmaxPolicy-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/SoftmaxPolicy-1.png


--------------------------------------------------------------------------------
/docs/reference/SoftmaxPolicy-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/SoftmaxPolicy-2.png


--------------------------------------------------------------------------------
/docs/reference/ThompsonSamplingPolicy-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/ThompsonSamplingPolicy-1.png


--------------------------------------------------------------------------------
/docs/reference/UCB1Policy-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/UCB1Policy-1.png


--------------------------------------------------------------------------------
/docs/reference/UCB1Policy-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/UCB1Policy-2.png


--------------------------------------------------------------------------------
/docs/reference/UCB2Policy-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/UCB2Policy-1.png


--------------------------------------------------------------------------------
/docs/reference/UCB2Policy-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/UCB2Policy-2.png


--------------------------------------------------------------------------------
/docs/reference/figures/1simulator.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/figures/1simulator.jpeg


--------------------------------------------------------------------------------
/docs/reference/figures/2agent.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/figures/2agent.jpeg


--------------------------------------------------------------------------------
/docs/reference/figures/3abandit.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/figures/3abandit.jpeg


--------------------------------------------------------------------------------
/docs/reference/figures/3bpolicy.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/figures/3bpolicy.jpeg


--------------------------------------------------------------------------------
/docs/reference/figures/3cbandit.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/figures/3cbandit.jpeg


--------------------------------------------------------------------------------
/docs/reference/figures/3dpolicy.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/figures/3dpolicy.jpeg


--------------------------------------------------------------------------------
/docs/reference/figures/algoepsilonfirst.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/figures/algoepsilonfirst.jpg


--------------------------------------------------------------------------------
/docs/reference/figures/cmab_all.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/figures/cmab_all.jpeg


--------------------------------------------------------------------------------
/docs/reference/figures/cmab_all_large.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/figures/cmab_all_large.jpg


--------------------------------------------------------------------------------
/docs/reference/figures/cmab_all_medium.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/figures/cmab_all_medium.jpg


--------------------------------------------------------------------------------
/docs/reference/invgamma-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/invgamma-1.png


--------------------------------------------------------------------------------
/docs/reference/value_remaining-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/value_remaining-1.png


--------------------------------------------------------------------------------
/man/BootstrapTSPolicy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/policy_mab_ts_bootstrap.R
 3 | \name{BootstrapTSPolicy}
 4 | \alias{BootstrapTSPolicy}
 5 | \title{Policy: Thompson sampling with the online bootstrap}
 6 | \description{
 7 | Bootstrap Thompson Sampling
 8 | }
 9 | \details{
10 | Bootstrap Thompson Sampling (BTS) is a heuristic method
11 | for solving bandit problems which modifies Thompson Sampling
12 | (see \link{ThompsonSamplingPolicy}) by replacing the posterior distribution
13 | used in Thompson sampling by a bootstrap distribution.
14 | }
15 | \section{Usage}{
16 | 
17 | \preformatted{
18 | policy <- BootstrapTSPolicy(J = 100, a= 1, b = 1)
19 | }
20 | 
21 | 
22 | \preformatted{
23 | policy <- BootstrapTSPolicy(1000)
24 | }
25 | }
26 | 
27 | \section{Arguments}{
28 | 
29 | 
30 | \describe{
31 | \item{\code{new(J = 100, a= 1, b = 1)}}{ Generates a new \code{BootstrapTSPolicy} object.
32 | Arguments are defined in the Argument section above.}
33 | }
34 | 
35 | \describe{
36 | \item{\code{set_parameters()}}{each policy needs to assign the parameters it wants to keep track of
37 | to list \code{self$theta_to_arms} that has to be defined in \code{set_parameters()}'s body.
38 | The parameters defined here can later be accessed by arm index in the following way:
39 | \code{theta[[index_of_arm]]$parameter_name}
40 | }
41 | }
42 | 
43 | \describe{
44 | \item{\code{get_action(context)}}{
45 | here, a policy decides which arm to choose, based on the current values
46 | of its parameters and, potentially, the current context.
47 | }
48 | }
49 | 
50 | \describe{
51 | \item{\code{set_reward(reward, context)}}{
52 | in \code{set_reward(reward, context)}, a policy updates its parameter values
53 | based on the reward received, and, potentially, the current context.
54 | }
55 | }
56 | }
57 | 
58 | \references{
59 | Eckles, D., & Kaptein, M. (2014). Thompson sampling with the online bootstrap.
60 | arXiv preprint arXiv:1410.4009.
61 | 
62 | Thompson, W. R. (1933). On the likelihood that one unknown probability exceeds another in
63 | view of the evidence of two samples. Biometrika, 25(3/4), 285-294.
64 | }
65 | \seealso{
66 | Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}},
67 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}}
68 | 
69 | Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}},
70 | \code{\link{OfflineReplayEvaluatorBandit}}
71 | 
72 | Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}}
73 | }
74 | 


--------------------------------------------------------------------------------
/man/ContextualEpochGreedyPolicy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/policy_cmab_lin_epoch_greedy.R
 3 | \name{ContextualEpochGreedyPolicy}
 4 | \alias{ContextualEpochGreedyPolicy}
 5 | \title{Policy: A Time and Space Efficient Algorithm for Contextual Linear Bandits}
 6 | \description{
 7 | Policy: A Time and Space Efficient Algorithm for Contextual Linear Bandits
 8 | }
 9 | \section{Usage}{
10 | 
11 | \preformatted{
12 |  policy <- ContextualEpochGreedyPolicy$new(sZl = 10)
13 | }
14 | }
15 | 
16 | \seealso{
17 | Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}},
18 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}}
19 | 
20 | Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}},
21 | \code{\link{OfflineReplayEvaluatorBandit}}
22 | 
23 | Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}}
24 | }
25 | 


--------------------------------------------------------------------------------
/man/ContextualEpsilonGreedyPolicy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/policy_cmab_lin_epsilon_greedy.R
 3 | \name{ContextualEpsilonGreedyPolicy}
 4 | \alias{ContextualEpsilonGreedyPolicy}
 5 | \title{Policy: ContextualEpsilonGreedyPolicy with unique linear models}
 6 | \description{
 7 | Policy: ContextualEpsilonGreedyPolicy with unique linear models
 8 | }
 9 | \section{Usage}{
10 | 
11 | \preformatted{
12 | policy <- ContextualEpsilonGreedyPolicy(epsilon = 0.1)
13 | }
14 | }
15 | 
16 | \section{Arguments}{
17 | 
18 | 
19 | \describe{
20 | \item{\code{epsilon}}{
21 | double, a positive real value R+
22 | }
23 | }
24 | }
25 | 
26 | \section{Parameters}{
27 | 
28 | 
29 | \describe{
30 | \item{\code{A}}{
31 | d*d identity matrix
32 | }
33 | \item{\code{b}}{
34 | a zero vector of length d
35 | }
36 | }
37 | }
38 | 
39 | \section{Methods}{
40 | 
41 | 
42 | \describe{
43 | \item{\code{new(epsilon = 0.1)}}{ Generates a new \code{ContextualEpsilonGreedyPolicy} object.
44 | Arguments are defined in the Argument section above.}
45 | }
46 | 
47 | \describe{
48 | \item{\code{set_parameters()}}{each policy needs to assign the parameters it wants to keep track of
49 | to list \code{self$theta_to_arms} that has to be defined in \code{set_parameters()}'s body.
50 | The parameters defined here can later be accessed by arm index in the following way:
51 | \code{theta[[index_of_arm]]$parameter_name}
52 | }
53 | }
54 | 
55 | \describe{
56 | \item{\code{get_action(context)}}{
57 | here, a policy decides which arm to choose, based on the current values
58 | of its parameters and, potentially, the current context.
59 | }
60 | }
61 | 
62 | \describe{
63 | \item{\code{set_reward(reward, context)}}{
64 | in \code{set_reward(reward, context)}, a policy updates its parameter values
65 | based on the reward received, and, potentially, the current context.
66 | }
67 | }
68 | }
69 | 
70 | \seealso{
71 | Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}},
72 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}}
73 | 
74 | Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}},
75 | \code{\link{OfflineReplayEvaluatorBandit}}
76 | 
77 | Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}}
78 | }
79 | 


--------------------------------------------------------------------------------
/man/ContextualLogitBTSPolicy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/policy_cmab_logit_ts_bootstrap.R
 3 | \name{ContextualLogitBTSPolicy}
 4 | \alias{ContextualLogitBTSPolicy}
 5 | \title{Policy: ContextualLogitBTSPolicy}
 6 | \description{
 7 | Policy: ContextualLogitBTSPolicy
 8 | }
 9 | \section{Usage}{
10 | 
11 | \preformatted{
12 | policy <- ContextualLogitBTSPolicy()
13 | }
14 | }
15 | 
16 | \seealso{
17 | Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}},
18 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}}
19 | 
20 | Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}},
21 | \code{\link{OfflineReplayEvaluatorBandit}}
22 | 
23 | Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}}
24 | }
25 | 


--------------------------------------------------------------------------------
/man/ContextualTSProbitPolicy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/policy_cmab_probit_ts.R
 3 | \name{ContextualTSProbitPolicy}
 4 | \alias{ContextualTSProbitPolicy}
 5 | \title{Policy: ContextualTSProbitPolicy}
 6 | \description{
 7 | Makes use of BOPR, ergo only use binary indepependent variables.
 8 | }
 9 | \section{Usage}{
10 | 
11 | \preformatted{
12 | policy <- ContextualTSProbitPolicy()
13 | }
14 | }
15 | 
16 | \seealso{
17 | Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}},
18 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}}
19 | 
20 | Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}},
21 | \code{\link{OfflineReplayEvaluatorBandit}}
22 | 
23 | Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}}
24 | }
25 | 


--------------------------------------------------------------------------------
/man/FixedPolicy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/policy_fixed.R
 3 | \name{FixedPolicy}
 4 | \alias{FixedPolicy}
 5 | \title{Policy: Fixed Arm}
 6 | \description{
 7 | \code{FixedPolicy} implements a "naive" policy which always chooses a prespecified arm.
 8 | }
 9 | \section{Usage}{
10 | 
11 | \preformatted{
12 | policy <- FixedPolicy(fixed_arm = 1)
13 | }
14 | }
15 | 
16 | \section{Arguments}{
17 | 
18 | 
19 | \describe{
20 | \item{\code{fixed_arm}}{
21 | numeric; index of the arm that will be chosen for each time step.
22 | }
23 | }
24 | }
25 | 
26 | \section{Methods}{
27 | 
28 | 
29 | \describe{
30 | \item{\code{new()}}{ Generates a new \code{FixedPolicy} object. Arguments are defined in the Argument
31 | section above.}
32 | }
33 | 
34 | \describe{
35 | \item{\code{set_parameters()}}{each policy needs to assign the parameters it wants to keep track of
36 | to list \code{self$theta_to_arms} that has to be defined in \code{set_parameters()}'s body.
37 | The parameters defined here can later be accessed by arm index in the following way:
38 | \code{theta[[index_of_arm]]$parameter_name}
39 | }
40 | }
41 | 
42 | \describe{
43 | \item{\code{get_action(context)}}{
44 | here, a policy decides which arm to choose, based on the current values
45 | of its parameters and, potentially, the current context.
46 | }
47 | }
48 | 
49 | \describe{
50 | \item{\code{set_reward(reward, context)}}{
51 | in \code{set_reward(reward, context)}, a policy updates its parameter values
52 | based on the reward received, and, potentially, the current context.
53 | }
54 | }
55 | }
56 | 
57 | \seealso{
58 | Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}},
59 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}}
60 | 
61 | Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}},
62 | \code{\link{OfflineReplayEvaluatorBandit}}
63 | 
64 | Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}}
65 | }
66 | 


--------------------------------------------------------------------------------
/man/OraclePolicy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/policy_oracle.R
 3 | \name{OraclePolicy}
 4 | \alias{OraclePolicy}
 5 | \title{Policy: Oracle}
 6 | \description{
 7 | \code{OraclePolicy} is a also known as a "cheating" or "godlike"
 8 | policy, as it knows the reward probabilities at all times,
 9 | and will always play the optimal arm. It is often used as
10 | a baseline to compare other policies to.
11 | }
12 | \section{Usage}{
13 | 
14 | \preformatted{
15 | policy <- OraclePolicy()
16 | }
17 | }
18 | 
19 | \section{Arguments}{
20 | 
21 | 
22 | \describe{
23 | \item{\code{name}}{
24 | character string specifying this policy. \code{name}
25 | is, among others, saved to the History log and displayed in summaries and plots.
26 | }
27 | }
28 | }
29 | 
30 | \section{Methods}{
31 | 
32 | 
33 | \describe{
34 | \item{\code{new()}}{ Generates a new \code{OraclePolicy} object. Arguments are defined in the Argument
35 | section above.}
36 | }
37 | 
38 | \describe{
39 | \item{\code{set_parameters()}}{each policy needs to assign the parameters it wants to keep track of
40 | to list \code{self$theta_to_arms} that has to be defined in \code{set_parameters()}'s body.
41 | The parameters defined here can later be accessed by arm index in the following way:
42 | \code{theta[[index_of_arm]]$parameter_name}
43 | }
44 | }
45 | 
46 | \describe{
47 | \item{\code{get_action(context)}}{
48 | here, a policy decides which arm to choose, based on the current values
49 | of its parameters and, potentially, the current context.
50 | }
51 | }
52 | 
53 | \describe{
54 | \item{\code{set_reward(reward, context)}}{
55 | in \code{set_reward(reward, context)}, a policy updates its parameter values
56 | based on the reward received, and, potentially, the current context.
57 | }
58 | }
59 | }
60 | 
61 | \references{
62 | Gittins, J., Glazebrook, K., & Weber, R. (2011). Multi-armed bandit allocation indices. John Wiley & Sons.
63 | (Original work published 1989)
64 | }
65 | \seealso{
66 | Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}},
67 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}}
68 | 
69 | Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}},
70 | \code{\link{OfflineReplayEvaluatorBandit}}
71 | 
72 | Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}}
73 | }
74 | 


--------------------------------------------------------------------------------
/man/RandomPolicy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/policy_random.R
 3 | \name{RandomPolicy}
 4 | \alias{RandomPolicy}
 5 | \title{Policy: Random}
 6 | \description{
 7 | \code{RandomPolicy} always explores, choosing arms uniformly at random.
 8 | In that respect, \code{RandomPolicy} is the mirror image of a pure greedy policy,
 9 | which would always seek to exploit.
10 | }
11 | \section{Usage}{
12 | 
13 | \preformatted{
14 | policy <- RandomPolicy(name = "RandomPolicy")
15 | }
16 | }
17 | 
18 | \section{Arguments}{
19 | 
20 | 
21 | \describe{
22 | \item{\code{name}}{
23 | character string specifying this policy. \code{name}
24 | is, among others, saved to the History log and displayed in summaries and plots.
25 | }
26 | }
27 | }
28 | 
29 | \section{Methods}{
30 | 
31 | 
32 | \describe{
33 | \item{\code{new()}}{ Generates a new \code{RandomPolicy} object. Arguments are defined in the Argument
34 | section above.}
35 | }
36 | 
37 | \describe{
38 | \item{\code{set_parameters()}}{each policy needs to assign the parameters it wants to keep track of
39 | to list \code{self$theta_to_arms} that has to be defined in \code{set_parameters()}'s body.
40 | The parameters defined here can later be accessed by arm index in the following way:
41 | \code{theta[[index_of_arm]]$parameter_name}
42 | }
43 | }
44 | 
45 | \describe{
46 | \item{\code{get_action(context)}}{
47 | here, a policy decides which arm to choose, based on the current values
48 | of its parameters and, potentially, the current context.
49 | }
50 | }
51 | 
52 | \describe{
53 | \item{\code{set_reward(reward, context)}}{
54 | in \code{set_reward(reward, context)}, a policy updates its parameter values
55 | based on the reward received, and, potentially, the current context.
56 | }
57 | }
58 | }
59 | 
60 | \examples{
61 | 
62 | horizon            <- 100L
63 | simulations        <- 100L
64 | weights            <- c(0.9, 0.1, 0.1)
65 | 
66 | policy             <- RandomPolicy$new()
67 | bandit             <- BasicBernoulliBandit$new(weights = weights)
68 | agent              <- Agent$new(policy, bandit)
69 | 
70 | history            <- Simulator$new(agent, horizon, simulations, do_parallel = FALSE)$run()
71 | 
72 | plot(history, type = "arms")
73 | }
74 | \references{
75 | Gittins, J., Glazebrook, K., & Weber, R. (2011). Multi-armed bandit allocation indices. John Wiley & Sons.
76 | (Original work published 1989)
77 | }
78 | \seealso{
79 | Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}},
80 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}}
81 | 
82 | Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}},
83 | \code{\link{OfflineReplayEvaluatorBandit}}
84 | 
85 | Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}}
86 | }
87 | 


--------------------------------------------------------------------------------
/man/UCB1Policy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/policy_mab_ucb1.R
 3 | \name{UCB1Policy}
 4 | \alias{UCB1Policy}
 5 | \title{Policy: UCB1}
 6 | \description{
 7 | UCB policy for bounded bandits with a Chernoff-Hoeffding Bound
 8 | }
 9 | \details{
10 | \code{UCB1Policy} constructs an optimistic estimate in the form of an Upper Confidence Bound to
11 | create an estimate of the expected payoff of each action, and picks the action with the highest estimate.
12 | If the guess is wrong, the optimistic guess quickly decreases, till another action has
13 | the higher estimate.
14 | }
15 | \section{Usage}{
16 | 
17 | \preformatted{
18 | policy <- UCB1Policy()
19 | }
20 | }
21 | 
22 | \section{Methods}{
23 | 
24 | 
25 | \describe{
26 | \item{\code{new()}}{ Generates a new \code{UCB1Policy} object.}
27 | }
28 | 
29 | \describe{
30 | \item{\code{set_parameters()}}{each policy needs to assign the parameters it wants to keep track of
31 | to list \code{self$theta_to_arms} that has to be defined in \code{set_parameters()}'s body.
32 | The parameters defined here can later be accessed by arm index in the following way:
33 | \code{theta[[index_of_arm]]$parameter_name}
34 | }
35 | }
36 | 
37 | \describe{
38 | \item{\code{get_action(context)}}{
39 | here, a policy decides which arm to choose, based on the current values
40 | of its parameters and, potentially, the current context.
41 | }
42 | }
43 | 
44 | \describe{
45 | \item{\code{set_reward(reward, context)}}{
46 | in \code{set_reward(reward, context)}, a policy updates its parameter values
47 | based on the reward received, and, potentially, the current context.
48 | }
49 | }
50 | }
51 | 
52 | \examples{
53 | \dontrun{
54 | 
55 | horizon            <- 100L
56 | simulations        <- 100L
57 | weights            <- c(0.9, 0.1, 0.1)
58 | 
59 | policy             <- UCB1Policy$new()
60 | bandit             <- BasicBernoulliBandit$new(weights = weights)
61 | agent              <- Agent$new(policy, bandit)
62 | 
63 | history            <- Simulator$new(agent, horizon, simulations, do_parallel = FALSE)$run()
64 | 
65 | plot(history, type = "cumulative")
66 | 
67 | plot(history, type = "arms")
68 | 
69 | }
70 | }
71 | \references{
72 | Lai, T. L., & Robbins, H. (1985). Asymptotically efficient adaptive allocation rules. Advances in applied
73 | mathematics, 6(1), 4-22.
74 | }
75 | \seealso{
76 | Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}},
77 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}}
78 | 
79 | Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}},
80 | \code{\link{OfflineReplayEvaluatorBandit}}
81 | 
82 | Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}}
83 | }
84 | 


--------------------------------------------------------------------------------
/man/clipr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{clipr}
 4 | \alias{clipr}
 5 | \title{Clip vectors}
 6 | \usage{
 7 | clipr(x, min, max)
 8 | }
 9 | \arguments{
10 | \item{x}{to be clipped vector}
11 | 
12 | \item{min}{numeric. lowest value}
13 | 
14 | \item{max}{numeric. highest value}
15 | }
16 | \description{
17 | Clips values to a mininum and maximum value. That is, all values below the lower clamp
18 | value and the upper clamp value become the lower/upper value specified
19 | }
20 | 


--------------------------------------------------------------------------------
/man/data_table_factors_to_numeric.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{data_table_factors_to_numeric}
 4 | \alias{data_table_factors_to_numeric}
 5 | \title{Convert all factor columns in data.table to numeric}
 6 | \usage{
 7 | data_table_factors_to_numeric(dt)
 8 | }
 9 | \arguments{
10 | \item{dt}{a data.table}
11 | }
12 | \value{
13 | the data.table with column factors converted to numeric
14 | }
15 | \description{
16 | Convert all factor columns in data.table to numeric
17 | }
18 | 


--------------------------------------------------------------------------------
/man/dec-set.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{dec<-}
 4 | \alias{dec<-}
 5 | \title{Decrement}
 6 | \usage{
 7 | dec(x) <- value
 8 | }
 9 | \arguments{
10 | \item{x}{object to be decremented}
11 | 
12 | \item{value}{value by which x will be modified}
13 | }
14 | \description{
15 | \code{dec<-} decrements \code{x} by value. Equivalent to \code{x <- x - value.}
16 | }
17 | \examples{
18 | x <- 6:10
19 | dec(x) <- 5
20 | x
21 | 
22 | }
23 | 


--------------------------------------------------------------------------------
/man/figures/1simulator.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/man/figures/1simulator.jpeg


--------------------------------------------------------------------------------
/man/figures/2agent.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/man/figures/2agent.jpeg


--------------------------------------------------------------------------------
/man/figures/3abandit.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/man/figures/3abandit.jpeg


--------------------------------------------------------------------------------
/man/figures/3bpolicy.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/man/figures/3bpolicy.jpeg


--------------------------------------------------------------------------------
/man/figures/3cbandit.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/man/figures/3cbandit.jpeg


--------------------------------------------------------------------------------
/man/figures/3dpolicy.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/man/figures/3dpolicy.jpeg


--------------------------------------------------------------------------------
/man/figures/algoepsilonfirst.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/man/figures/algoepsilonfirst.jpg


--------------------------------------------------------------------------------
/man/figures/cmab_all.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/man/figures/cmab_all.jpeg


--------------------------------------------------------------------------------
/man/figures/cmab_all_large.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/man/figures/cmab_all_large.jpg


--------------------------------------------------------------------------------
/man/figures/cmab_all_medium.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/man/figures/cmab_all_medium.jpg


--------------------------------------------------------------------------------
/man/formatted_difftime.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{formatted_difftime}
 4 | \alias{formatted_difftime}
 5 | \title{Format difftime objects}
 6 | \usage{
 7 | formatted_difftime(x)
 8 | }
 9 | \arguments{
10 | \item{x}{difftime object}
11 | }
12 | \value{
13 | string "days, h:mm:ss.ms"
14 | }
15 | \description{
16 | Format difftime objects
17 | }
18 | 


--------------------------------------------------------------------------------
/man/get_arm_context.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{get_arm_context}
 4 | \alias{get_arm_context}
 5 | \title{Return context vector of an arm}
 6 | \usage{
 7 | get_arm_context(
 8 |   context,
 9 |   arm,
10 |   select_features = NULL,
11 |   prepend_arm_vector = FALSE
12 | )
13 | }
14 | \arguments{
15 | \item{context}{a context list containing a d x k Matrix or
16 | d dimensional context vector X, the number of features d and
17 | number of arms k.}
18 | 
19 | \item{arm}{index of arm.}
20 | 
21 | \item{select_features}{indices of to be returned features.}
22 | 
23 | \item{prepend_arm_vector}{prepend a one-hot-encoded arm vector to the returned context vector. That is,
24 | when k = 5 arms, and the to be returned arm vector is arm 3, prepend c(0,0,1,0,0)}
25 | }
26 | \value{
27 | Vector that represents context related to an arm
28 | }
29 | \description{
30 | Given d x k matrix or d dimensional vector X,
31 | returns a vector with arm's context.
32 | }
33 | 


--------------------------------------------------------------------------------
/man/get_full_context.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{get_full_context}
 4 | \alias{get_full_context}
 5 | \title{Get full context matrix over all arms}
 6 | \usage{
 7 | get_full_context(context, select_features = NULL, prepend_arm_matrix = FALSE)
 8 | }
 9 | \arguments{
10 | \item{context}{a context list containing a d x k Matrix or
11 | d dimensional context vector X, the number of features d and
12 | number of arms k.}
13 | 
14 | \item{select_features}{indices of to be returned feature rows.b}
15 | 
16 | \item{prepend_arm_matrix}{prepend a diagonal arm matrix to the returned context vector. That is,
17 | when k = 5 arms, prepend diag(5) to the top of the matrix.}
18 | }
19 | \value{
20 | A d x k context Matrix
21 | }
22 | \description{
23 | Given matrix or d dimensional vector X,
24 | number of arms k and number of features d
25 | returns a matrix with d x k context matrix
26 | }
27 | 


--------------------------------------------------------------------------------
/man/get_global_seed.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{get_global_seed}
 4 | \alias{get_global_seed}
 5 | \title{Lookup .Random.seed in global environment}
 6 | \usage{
 7 | get_global_seed()
 8 | }
 9 | \value{
10 | an integer vector, containing the random number generator (RNG) state for random number generation
11 | }
12 | \description{
13 | Lookup .Random.seed in global environment
14 | }
15 | 


--------------------------------------------------------------------------------
/man/inc-set.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{inc<-}
 4 | \alias{inc<-}
 5 | \title{Increment}
 6 | \usage{
 7 | inc(x) <- value
 8 | }
 9 | \arguments{
10 | \item{x}{object to be incremented}
11 | 
12 | \item{value}{value by which x will be modified}
13 | }
14 | \description{
15 | \code{inc<-} increments \code{x} by value. Equivalent to \code{x <- x + value.}
16 | }
17 | \examples{
18 | x <- 1:5
19 | inc(x) <- 5
20 | x
21 | 
22 | }
23 | 


--------------------------------------------------------------------------------
/man/ind.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{ind}
 4 | \alias{ind}
 5 | \title{On-the-fly indicator function for use in formulae}
 6 | \usage{
 7 | ind(cond)
 8 | }
 9 | \arguments{
10 | \item{cond}{a logical condition to be evaluated}
11 | }
12 | \value{
13 | a binary (0/1) coded variable indicating whether the condition is true
14 | }
15 | \description{
16 | On-the-fly indicator function for use in formulae
17 | }
18 | 


--------------------------------------------------------------------------------
/man/inv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{inv}
 4 | \alias{inv}
 5 | \title{Inverse from Choleski (or QR) Decomposition.}
 6 | \usage{
 7 | inv(M)
 8 | }
 9 | \arguments{
10 | \item{M}{matrix}
11 | }
12 | \description{
13 | Invert a symmetric, positive definite square matrix from its Choleski decomposition.
14 | }
15 | \examples{
16 | inv(cbind(1, 1:3, c(1,3,7)))
17 | 
18 | }
19 | 


--------------------------------------------------------------------------------
/man/invgamma.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{invgamma}
 4 | \alias{invgamma}
 5 | \alias{dinvgamma}
 6 | \alias{pinvgamma}
 7 | \alias{qinvgamma}
 8 | \alias{rinvgamma}
 9 | \title{The Inverse Gamma Distribution}
10 | \usage{
11 | dinvgamma(x, shape, rate = 1, scale = 1/rate, log = FALSE)
12 | 
13 | pinvgamma(q, shape, rate = 1, scale = 1/rate, lower.tail = TRUE, log.p = FALSE)
14 | 
15 | qinvgamma(p, shape, rate = 1, scale = 1/rate, lower.tail = TRUE, log.p = FALSE)
16 | 
17 | rinvgamma(n, shape, rate = 1, scale = 1/rate)
18 | }
19 | \arguments{
20 | \item{x, q}{vector of quantiles.}
21 | 
22 | \item{shape}{inverse gamma shape parameter}
23 | 
24 | \item{rate}{inverse gamma rate parameter}
25 | 
26 | \item{scale}{alternative to rate; scale = 1/rate}
27 | 
28 | \item{log, log.p}{logical; if TRUE, probabilities p are given as
29 | log(p).}
30 | 
31 | \item{lower.tail}{logical; if TRUE (default), probabilities are P(X <= x) otherwise, P(X > x).}
32 | 
33 | \item{p}{vector of probabilities.}
34 | 
35 | \item{n}{number of observations. If length(n) > 1, the length is
36 | taken to be the number required.}
37 | }
38 | \description{
39 | Density, distribution function, quantile function and random
40 | generation for the inverse gamma distribution.
41 | }
42 | \details{
43 | The inverse gamma distribution with parameters shape and rate has
44 | density \emph{f(x) = rate^shape/Gamma(shape) x^(-1-shape)
45 | e^(-rate/x)} it is the inverse of the standard gamma
46 | parameterization in R.
47 | 
48 | The functions (d/p/q/r)invgamma simply wrap those of the standard
49 | (d/p/q/r)gamma R implementation, so look at, say,
50 | \code{\link{dgamma}} for details.
51 | }
52 | \examples{
53 | 
54 | s <- seq(0, 5, .01)
55 | plot(s, dinvgamma(s, 7, 10), type = 'l')
56 | 
57 | f <- function(x) dinvgamma(x, 7, 10)
58 | q <- 2
59 | integrate(f, 0, q)
60 | (p <- pinvgamma(q, 7, 10))
61 | qinvgamma(p, 7, 10) # = q
62 | mean(rinvgamma(1e5, 7, 10) <= q)
63 | }
64 | 


--------------------------------------------------------------------------------
/man/invlogit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{invlogit}
 4 | \alias{invlogit}
 5 | \title{Inverse Logit Function}
 6 | \usage{
 7 | invlogit(x)
 8 | }
 9 | \arguments{
10 | \item{x}{A numeric object.}
11 | }
12 | \value{
13 | An object of the same type as x containing the inverse logits of the input values.
14 | }
15 | \description{
16 | Given a numeric object return the inverse logit of the values.
17 | }
18 | 


--------------------------------------------------------------------------------
/man/is_rstudio.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{is_rstudio}
 4 | \alias{is_rstudio}
 5 | \title{Check if in RStudio}
 6 | \usage{
 7 | is_rstudio()
 8 | }
 9 | \value{
10 | A \code{logical} value that indicates whether R is open in RStudio.
11 | }
12 | \description{
13 | Detects whether R is open in RStudio.
14 | }
15 | \examples{
16 | is_rstudio()
17 | 
18 | }
19 | 


--------------------------------------------------------------------------------
/man/mvrnorm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{mvrnorm}
 4 | \alias{mvrnorm}
 5 | \title{Simulate from a Multivariate Normal Distribution}
 6 | \usage{
 7 | mvrnorm(n, mu, sigma)
 8 | }
 9 | \arguments{
10 | \item{n}{the number of samples required.}
11 | 
12 | \item{mu}{a vector giving the means of the variables.}
13 | 
14 | \item{sigma}{a positive-definite symmetric matrix specifying the covariance matrix of the variables.}
15 | }
16 | \value{
17 | If \code{n = 1} a vector of the same length as \code{mu}, otherwise an \code{n} by
18 | \code{length(mu)} matrix with one sample in each row.
19 | }
20 | \description{
21 | Produces one or more samples from the specified
22 | multivariate normal distribution.
23 | }
24 | 


--------------------------------------------------------------------------------
/man/one_hot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{one_hot}
 4 | \alias{one_hot}
 5 | \title{One Hot Encoding of data.table columns}
 6 | \usage{
 7 | one_hot(
 8 |   dt,
 9 |   cols = "auto",
10 |   sparsifyNAs = FALSE,
11 |   naCols = FALSE,
12 |   dropCols = TRUE,
13 |   dropUnusedLevels = FALSE
14 | )
15 | }
16 | \arguments{
17 | \item{dt}{A data.table}
18 | 
19 | \item{cols}{Which column(s) should be one-hot-encoded? DEFAULT = "auto" encodes all unordered
20 | factor columns.}
21 | 
22 | \item{sparsifyNAs}{Should NAs be converted to 0s?}
23 | 
24 | \item{naCols}{Should columns be generated to indicate the present of NAs? Will only apply to factor
25 | columns with at least one NA}
26 | 
27 | \item{dropCols}{Should the resulting data.table exclude the original columns which are one-hot-encoded?}
28 | 
29 | \item{dropUnusedLevels}{Should columns of all 0s be generated for unused factor levels?}
30 | }
31 | \description{
32 | One-Hot-Encode unordered factor columns of a data.table mltools. From ben519's "mltools" package.
33 | }
34 | \details{
35 | One-hot-encoding converts an unordered categorical vector (i.e. a factor) to multiple binarized vectors
36 | where each binary vector of
37 | 1s and 0s indicates the presence of a class (i.e. level) of the of the original vector.
38 | }
39 | \examples{
40 | library(data.table)
41 | 
42 | dt <- data.table(
43 |   ID = 1:4,
44 |   color = factor(c("red", NA, "blue", "blue"), levels=c("blue", "green", "red"))
45 | )
46 | 
47 | one_hot(dt)
48 | one_hot(dt, sparsifyNAs=TRUE)
49 | one_hot(dt, naCols=TRUE)
50 | one_hot(dt, dropCols=FALSE)
51 | one_hot(dt, dropUnusedLevels=TRUE)
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/man/ones_in_zeroes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{ones_in_zeroes}
 4 | \alias{ones_in_zeroes}
 5 | \title{A vector of zeroes and ones}
 6 | \usage{
 7 | ones_in_zeroes(vector_length, index_of_one)
 8 | }
 9 | \arguments{
10 | \item{vector_length}{How long will the vector be?}
11 | 
12 | \item{index_of_one}{Where to insert the one?}
13 | }
14 | \value{
15 | Vector of zeroes with one(s) at given index position(s)
16 | }
17 | \description{
18 | A vector of zeroes and ones
19 | }
20 | 


--------------------------------------------------------------------------------
/man/plot.history.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_generic.R
 3 | \name{plot.history}
 4 | \alias{plot.history}
 5 | \alias{plot.History}
 6 | \title{Plot Method for Contextual History}
 7 | \usage{
 8 | \method{plot}{History}(x, ...)
 9 | }
10 | \arguments{
11 | \item{x}{A \code{History} object.}
12 | 
13 | \item{...}{Further plotting parameters.}
14 | }
15 | \description{
16 | plot.history, a method for the plot generic. It is designed for a quick look at History data.
17 | }
18 | \seealso{
19 | Core contextual classes: \code{\link{Simulator}},
20 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}}
21 | 
22 | Bandit classes: \code{\link{Bandit}}, \code{\link{BasicBernoulliBandit}},
23 | \code{\link{OfflineReplayEvaluatorBandit}}, \code{\link{ContextualLogitBandit}}
24 | }
25 | 


--------------------------------------------------------------------------------
/man/print.history.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_generic.R
 3 | \name{print.history}
 4 | \alias{print.history}
 5 | \alias{print.History}
 6 | \title{Print Method for Contextual History}
 7 | \usage{
 8 | \method{print}{History}(x, ...)
 9 | }
10 | \arguments{
11 | \item{x}{A \code{History} object.}
12 | 
13 | \item{...}{Further plotting parameters.}
14 | }
15 | \description{
16 | print.history, a method for the print generic. It is designed for a quick look at History data.
17 | }
18 | \seealso{
19 | Core contextual classes: \code{\link{Simulator}},
20 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}}
21 | 
22 | Bandit classes: \code{\link{Bandit}}, \code{\link{BasicBernoulliBandit}},
23 | \code{\link{OfflineReplayEvaluatorBandit}}, \code{\link{ContextualLogitBandit}}
24 | }
25 | 


--------------------------------------------------------------------------------
/man/prob_winner.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{prob_winner}
 4 | \alias{prob_winner}
 5 | \title{Binomial Win Probability}
 6 | \usage{
 7 | prob_winner(post)
 8 | }
 9 | \arguments{
10 | \item{post}{Simulated results from the posterior, as provided by sim_post()}
11 | }
12 | \value{
13 | Probabilities each arm is the winner.
14 | }
15 | \description{
16 | Function to compute probability that each arm is the winner,
17 | given simulated posterior results.
18 | }
19 | \examples{
20 | 
21 | x <- c(10,20,30,50)
22 | n <- c(100,102,120,130)
23 | betaPost <- sim_post(x,n)
24 | pw <- prob_winner(betaPost)
25 | 
26 | }
27 | \author{
28 | Thomas Lotze and Markus Loecher
29 | }
30 | 


--------------------------------------------------------------------------------
/man/sample_one_of.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{sample_one_of}
 4 | \alias{sample_one_of}
 5 | \title{Sample one element from vector or list}
 6 | \usage{
 7 | sample_one_of(x)
 8 | }
 9 | \arguments{
10 | \item{x}{A vector of one or more elements from which to choose}
11 | }
12 | \value{
13 | One value, drawn from x.
14 | }
15 | \description{
16 | Takes one sample from a vector or list. Does not throw an error for zero length lists.
17 | }
18 | 


--------------------------------------------------------------------------------
/man/set_external.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{set_external}
 4 | \alias{set_external}
 5 | \title{Change Default Graphing Device from RStudio}
 6 | \usage{
 7 | set_external(ext = TRUE, width = 10, height = 6)
 8 | }
 9 | \arguments{
10 | \item{ext}{A \code{logical} indicating whether to plot in a popup or within the RStudio UI.}
11 | 
12 | \item{width}{Width in pixels of the popup window}
13 | 
14 | \item{height}{Height in pixels of the popup window}
15 | }
16 | \description{
17 | Checks to see if the user is in RStudio. If so, then it changes the device to a popup window.
18 | }
19 | \details{
20 | Depending on the operating system, the default drivers attempted to be used are:
21 | 
22 | OS X: quartz()
23 | 
24 | Linux: x11()
25 | 
26 | Windows: windows()
27 | 
28 | Note, this setting is not permanent. Thus, the behavioral change will last
29 | until the end of the session.
30 | 
31 | Also, the active graphing environment will be killed.
32 | As a result, any graphs that are open will be deleted.
33 | }
34 | \examples{
35 | \dontrun{
36 | 
37 | # Turn on external graphs
38 | external_graphs()
39 | 
40 | # Turn off external graphs
41 | external_graphs(F)
42 | }
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/man/set_global_seed.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{set_global_seed}
 4 | \alias{set_global_seed}
 5 | \title{Set .Random.seed to a pre-saved value}
 6 | \usage{
 7 | set_global_seed(x)
 8 | }
 9 | \arguments{
10 | \item{x}{integer vector}
11 | }
12 | \description{
13 | Set .Random.seed to a pre-saved value
14 | }
15 | 


--------------------------------------------------------------------------------
/man/sherman_morrisson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{sherman_morrisson}
 4 | \alias{sherman_morrisson}
 5 | \title{Sherman-Morrisson inverse}
 6 | \usage{
 7 | sherman_morrisson(inv, x)
 8 | }
 9 | \arguments{
10 | \item{inv}{to be updated inverse matrix}
11 | 
12 | \item{x}{column vector to update inv with}
13 | }
14 | \description{
15 | Sherman-Morrisson inverse
16 | }
17 | 


--------------------------------------------------------------------------------
/man/sim_post.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{sim_post}
 4 | \alias{sim_post}
 5 | \title{Binomial Posterior Simulator}
 6 | \usage{
 7 | sim_post(x, n, alpha = 1, beta = 1, ndraws = 5000)
 8 | }
 9 | \arguments{
10 | \item{x}{Vector of the number of successes per arm.}
11 | 
12 | \item{n}{Vector of the number of trials per arm.}
13 | 
14 | \item{alpha}{Shape parameter alpha for the prior beta distribution.}
15 | 
16 | \item{beta}{Shape parameter beta for the prior beta distribution.}
17 | 
18 | \item{ndraws}{Number of random draws from the posterior.}
19 | }
20 | \value{
21 | Matrix of bayesian probabilities for each arm being the best binomial bandit
22 | }
23 | \description{
24 | Simulates the posterior distribution of
25 | the Bayesian probabilities for each arm being the
26 | best binomial bandit.
27 | }
28 | \examples{
29 | 
30 | x <- c(10,20,30,50)
31 | n <- c(100,102,120,130)
32 | sp <- sim_post(x,n)
33 | 
34 | }
35 | \author{
36 | Thomas Lotze and Markus Loecher
37 | }
38 | 


--------------------------------------------------------------------------------
/man/sum_of.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{sum_of}
 4 | \alias{sum_of}
 5 | \title{Sum of list}
 6 | \usage{
 7 | sum_of(x)
 8 | }
 9 | \arguments{
10 | \item{x}{List}
11 | }
12 | \description{
13 | Returns the sum of the values of the elements of a list \code{x}.
14 | }
15 | \details{
16 | If there is a tie, and equal_is_random is TRUE,
17 | the index of one of the tied maxima is returned at random. Otherwise,
18 | the value with the lowest index is returned.
19 | }
20 | \examples{
21 | 
22 | theta = list(par_one = list(1,2,3), par_two = list(2,3,4))
23 | sum_of(theta$par_one)
24 | 
25 | }
26 | 


--------------------------------------------------------------------------------
/man/summary.history.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_generic.R
 3 | \name{summary.history}
 4 | \alias{summary.history}
 5 | \alias{summary.History}
 6 | \title{Summary Method for Contextual History}
 7 | \usage{
 8 | \method{summary}{History}(object, ...)
 9 | }
10 | \arguments{
11 | \item{object}{A \code{History} object.}
12 | 
13 | \item{...}{Further summary parameters.}
14 | }
15 | \description{
16 | summary.history, a method for the summary generic. It is designed for a quick summary of History data.
17 | }
18 | \seealso{
19 | Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}},
20 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}}
21 | 
22 | Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}},
23 | \code{\link{OfflineReplayEvaluatorBandit}}
24 | 
25 | Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}}
26 | }
27 | 


--------------------------------------------------------------------------------
/man/value_remaining.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{value_remaining}
 4 | \alias{value_remaining}
 5 | \title{Potential Value Remaining}
 6 | \usage{
 7 | value_remaining(x, n, alpha = 1, beta = 1, ndraws = 10000)
 8 | }
 9 | \arguments{
10 | \item{x}{Vector of the number of successes per arm.}
11 | 
12 | \item{n}{Vector of the number of trials per arm.}
13 | 
14 | \item{alpha}{Shape parameter alpha for the prior beta distribution.}
15 | 
16 | \item{beta}{Shape parameter beta for the prior beta distribution.}
17 | 
18 | \item{ndraws}{Number of random draws from the posterior.}
19 | }
20 | \value{
21 | Value_remaining distribution; the distribution of
22 | improvement amounts that another arm might have over the current best arm.
23 | }
24 | \description{
25 | Compute "value_remaining" in arms not
26 | currently best in binomial bandits
27 | }
28 | \examples{
29 | 
30 | x <- c(10,20,30,80)
31 | n <- c(100,102,120,240)
32 | vr <- value_remaining(x, n)
33 | hist(vr)
34 | 
35 | # "potential value" remaining in the experiment
36 | potential_value <- quantile(vr, 0.95)
37 | 
38 | }
39 | \author{
40 | Thomas Lotze and Markus Loecher
41 | }
42 | 


--------------------------------------------------------------------------------
/man/var_welford.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{var_welford}
 4 | \alias{var_welford}
 5 | \title{Welford's variance}
 6 | \usage{
 7 | var_welford(z)
 8 | }
 9 | \arguments{
10 | \item{z}{vector}
11 | }
12 | \value{
13 | variance
14 | }
15 | \description{
16 | Welford described a method for 'robust' one-pass computation of the
17 | standard deviation. By 'robust', we mean robust to round-off caused
18 | by a large shift in the mean.
19 | }
20 | 


--------------------------------------------------------------------------------
/man/which_max_list.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{which_max_list}
 4 | \alias{which_max_list}
 5 | \title{Get maximum value in list}
 6 | \usage{
 7 | which_max_list(x, equal_is_random = TRUE)
 8 | }
 9 | \arguments{
10 | \item{x}{vector of values}
11 | 
12 | \item{equal_is_random}{boolean}
13 | }
14 | \description{
15 | Returns the index of the maximum value in list \code{x}.
16 | }
17 | \details{
18 | If there is a tie and \code{equal_is_random} is \code{TRUE},
19 | the index of one of the tied maxima is returned at random.
20 | 
21 | If \code{equal_is_random} is \code{FALSE},
22 | the maximum with the lowest index number is returned.
23 | }
24 | \examples{
25 | 
26 | theta = list(par_one = list(1,2,3), par_two = list(2,3,4))
27 | which_max_list(theta$par_one)
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/man/which_max_tied.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/functions_utility.R
 3 | \name{which_max_tied}
 4 | \alias{which_max_tied}
 5 | \title{Get maximum value randomly breaking ties}
 6 | \usage{
 7 | which_max_tied(x, equal_is_random = TRUE)
 8 | }
 9 | \arguments{
10 | \item{x}{vector of values}
11 | 
12 | \item{equal_is_random}{boolean}
13 | }
14 | \description{
15 | Returns the index of the maximum value in vector \code{vec}.
16 | }
17 | \details{
18 | If there is a tie, and equal_is_random is TRUE,
19 | the index of one of the tied maxima is returned at random. Otherwise,
20 | the value with the lowest index is returned.
21 | }
22 | 


--------------------------------------------------------------------------------
/tests/figs/deps.txt:
--------------------------------------------------------------------------------
1 | - vdiffr-svg-engine: 1.0
2 | - vdiffr: 0.3.2.2
3 | - freetypeharfbuzz: 0.2.5
4 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | Sys.setenv("R_TESTS" = "")
2 | 
3 | library(testthat)
4 | library(contextual)
5 | 
6 | test_check("contextual")
7 | 


--------------------------------------------------------------------------------
/tests/testthat/setup_tests.R:
--------------------------------------------------------------------------------
1 | suppressWarnings(RNGversion("3.5.0"))


--------------------------------------------------------------------------------
/tests/testthat/teardown_tests.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | delete_files <- dir(path=".", pattern="*.csv")
 4 | file.remove(delete_files)
 5 | 
 6 | delete_files <- dir(path=".", pattern="*.pdf")
 7 | file.remove(delete_files)
 8 | 
 9 | delete_files <- dir(path=".", pattern="*.svg")
10 | file.remove(delete_files)
11 | 
12 | delete_files <- dir(path=".", pattern="*.log")
13 | file.remove(delete_files)
14 | 


--------------------------------------------------------------------------------
/tests/testthat/test_agent.R:
--------------------------------------------------------------------------------
 1 | context("Agent")
 2 | 
 3 | test_that("Agent", {
 4 | 
 5 |   policy             <- EpsilonGreedyPolicy$new(epsilon = 0.1)
 6 |   expect_identical(typeof(policy), "environment")
 7 | 
 8 |   bandit             <- BasicBernoulliBandit$new(c(0.6, 0.1, 0.1))
 9 |   expect_identical(typeof(bandit), "environment")
10 | 
11 |   agent              <- Agent$new(policy, bandit, name = "testme", sparse = 0.5)
12 |   expect_identical(typeof(agent), "environment")
13 |   expect_equal(agent$name, "testme")
14 |   expect_equal(agent$sparse, 0.5)
15 |   expect_equal(agent$bandit$d, 1)
16 |   expect_equal(agent$bandit$k, 3)
17 |   expect_equal(agent$policy$class_name, "EpsilonGreedyPolicy")
18 |   expect_equal(agent$policy$epsilon, 0.1)
19 |   expect_equal(agent$policy$theta$mean[[1]], 0)
20 | 
21 |   history            <- Simulator$new(agents = agent,
22 |                                       horizon = 10,
23 |                                       simulations = 10,
24 |                                       do_parallel = FALSE,
25 |                                       log_interval = 1,
26 |                                       progress_file = TRUE)$run()
27 | 
28 |   expect_identical(history$cumulative$testme$reward,0.4)
29 | 
30 |   t                  <- agent$get_t()
31 |   agent$set_t(t+1)
32 |   t                  <- agent$get_t()
33 | 
34 |   expect_identical(agent$get_t(),1)
35 | 
36 |   Sys.sleep(0.1)
37 |   expect_true(file.exists("parallel.log"))
38 |   expect_true(file.exists("workers_progress.log"))
39 |   expect_true(file.exists("agents_progress.log"))
40 |   if (file.exists("workers_progress.log")) file.remove("workers_progress.log")
41 |   if (file.exists("agents_progress.log")) file.remove("agents_progress.log")
42 |   if (file.exists("progress.log")) file.remove("progress.log")
43 | 
44 | })
45 | 


--------------------------------------------------------------------------------
/tests/testthat/test_policies.R:
--------------------------------------------------------------------------------
 1 | context("Policies")
 2 | 
 3 | test_that("ContextualLogitBTSPolicy simulation", {
 4 | 
 5 |   horizon       <- 20L
 6 |   simulations   <- 10L
 7 | 
 8 |   bandit        <- ContextualLinearBandit$new(k = 5, d = 5, binary_rewards = TRUE)
 9 | 
10 |   agents        <-list(
11 |     Agent$new(ContextualLogitBTSPolicy$new(), bandit)
12 |   )
13 | 
14 |   simulation     <- Simulator$new(agents, horizon, simulations, do_parallel = FALSE)
15 |   history        <- simulation$run()
16 | 
17 |   expect_equal(history$cumulative$ContextualLogitBTS$cum_reward,  6.2,  tolerance = 0.01)
18 |   expect_equal(history$cumulative$ContextualLogitBTS$cum_regret,  11.6, tolerance = 0.01)
19 | 
20 | })
21 | 


--------------------------------------------------------------------------------
/tests/testthat/test_policy.R:
--------------------------------------------------------------------------------
 1 | context("Policy")
 2 | 
 3 | test_that("Policy", {
 4 | 
 5 |   policy      <- Policy$new()
 6 |   expect_identical(typeof(policy), "environment")
 7 | 
 8 |   policy$theta_to_arms <- list(n=3)
 9 |   theta <- policy$initialize_theta(4)
10 |   expect_identical(theta$n[[4]], 3)
11 | 
12 |   expect_identical(policy$class_name, "Policy")
13 |   expect_error(policy$get_action(), "has not been implemented")
14 |   expect_error(policy$set_reward(), "has not been implemented")
15 | 
16 | })
17 | 


--------------------------------------------------------------------------------
/tests/testthat/test_utility_functions.R:
--------------------------------------------------------------------------------
 1 | context("Utility Functions")
 2 | 
 3 | test_that("Inc and Dec", {
 4 | 
 5 |   x <- 1:5
 6 |   inc(x) <- 5
 7 |   expect_equal(6:10, x)
 8 | 
 9 |   dec(x) <- 5
10 |   expect_equal(1:5, x)
11 | 
12 | })
13 | 
14 | test_that("which_max_list", {
15 |   set.seed(1)
16 | 
17 |   theta = list(par_one = list(1,2,2), par_two = list(2,3,4))
18 |   expect_equal(which_max_list(theta$par_one, equal_is_random = TRUE), 2)
19 | 
20 |   theta = list(par_one = list(1,2,2), par_two = list(2,3,4))
21 |   expect_equal(which_max_list(theta$par_one, equal_is_random = FALSE), 2)
22 | })
23 | 
24 | 
25 | test_that("Welford", {
26 |   set.seed(42)
27 |   v <- sample(20)
28 |   s <- var_welford(v)
29 |   expect_equal(s,35)
30 | })
31 | 
32 | test_that("Formatted difftime", {
33 |   ft <- formatted_difftime(difftime(strftime ("2019-10-18 13:35:35 CEST"),
34 |                                     strftime ("2018-09-17 12:31:30 CEST")))
35 |   expect_equal(ft,"396 days, 1:04:05")
36 | })
37 | 
38 | test_that("Inverse Logit", {
39 |   expect_equal(invlogit(10),0.9999546, tolerance = 0.002)
40 | })
41 | 
42 | test_that("InvGamma", {
43 | 
44 |   s <- seq(0, 5, .01)
45 |   x <- dinvgamma(s, 7, 10)
46 |   x2 <- dinvgamma(s, 7, scale = 0.10)
47 | 
48 |   expect_equal_to_reference(x, file = "dinvgamma1.rds")
49 |   expect_equal_to_reference(x2, file = "dinvgamma2.rds")
50 | 
51 |   x2 <- dinvgamma(s, 7, 10, log = TRUE)
52 |   expect_equal_to_reference(x2, file = "logdiv.rds")
53 | 
54 |   q <- 2
55 |   (p <- pinvgamma(q, 7, 10))
56 |   expect_equal(qinvgamma(p, 7, 10), q)
57 | 
58 |   q <- 2
59 |   (p <- pinvgamma(q, 7, scale = 0.10))
60 |   expect_equal(qinvgamma(p, 7, scale = 0.10), q)
61 | 
62 |   expect_equal(mean(rinvgamma(1e5, 7, 10) <= q),0.76088, tolerance = 0.002)
63 | 
64 |   expect_equal(mean(rinvgamma(1e5, 7, scale = 0.10) <= q),0.763, tolerance = 0.02)
65 | 
66 | })
67 | 
68 | 


--------------------------------------------------------------------------------
/vignettes/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/1.png


--------------------------------------------------------------------------------
/vignettes/Rplot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/Rplot.png


--------------------------------------------------------------------------------
/vignettes/basic_epsilon_greedy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/basic_epsilon_greedy.png


--------------------------------------------------------------------------------
/vignettes/carskit_depaul.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/carskit_depaul.png


--------------------------------------------------------------------------------
/vignettes/cmabs.R:
--------------------------------------------------------------------------------
1 | ## ----setup, include = FALSE, cache = TRUE--------------------------------
2 | knitr::opts_chunk$set(
3 |   collapse = TRUE,
4 |   comment = "#>"
5 | )
6 | 
7 | 


--------------------------------------------------------------------------------
/vignettes/cmabs.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Demo: Basic Synthetic cMAB Policies"
 3 | author: "Robin van Emden"
 4 | date: "`r Sys.Date()`"
 5 | output: rmarkdown::html_vignette
 6 | vignette: >
 7 |   %\VignetteIndexEntry{Demo: Basic Synthetic cMAB Policies}
 8 |   %\VignetteEngine{knitr::rmarkdown}
 9 |   %\VignetteEncoding{UTF-8}
10 | ---
11 | 
12 | ```{r setup, include = FALSE, cache = TRUE}
13 | knitr::opts_chunk$set(
14 |   collapse = TRUE,
15 |   comment = "#>"
16 | )
17 | ```
18 | 
19 | ```r
20 | library(contextual)
21 | 
22 | horizon       <- 100L
23 | simulations   <- 100L
24 | 
25 | bandit        <- ContextualLinearBandit$new(k = 4, d = 3, sigma = 0.3)
26 | 
27 | # Linear CMAB policies comparison
28 | 
29 | agents <- list(Agent$new(EpsilonGreedyPolicy$new(0.1), bandit, "EGreedy"),
30 |                Agent$new(ContextualEpsilonGreedyPolicy$new(0.1), bandit, "cEGreedy"),
31 |                Agent$new(ContextualLinTSPolicy$new(0.1), bandit, "LinTS"),
32 |                Agent$new(LinUCBDisjointOptimizedPolicy$new(0.6), bandit, "LinUCB"))
33 | 
34 | simulation     <- Simulator$new(agents, horizon, simulations, do_parallel = TRUE)
35 | 
36 | history        <- simulation$run()
37 | 
38 | plot(history, type = "cumulative", rate = FALSE, legend_position = "topleft")
39 | 
40 | ```
41 | 
42 | ![](cmabs.png)
43 | 


--------------------------------------------------------------------------------
/vignettes/cmabs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/cmabs.png


--------------------------------------------------------------------------------
/vignettes/cmabsoffline.R:
--------------------------------------------------------------------------------
1 | ## ----setup, include = FALSE, cache = TRUE--------------------------------
2 | knitr::opts_chunk$set(
3 |   collapse = TRUE,
4 |   comment = "#>"
5 | )
6 | 
7 | 


--------------------------------------------------------------------------------
/vignettes/cmabsoffline.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Demo: Offline cMAB LinUCB evaluation"
 3 | author: "Robin van Emden"
 4 | date: "`r Sys.Date()`"
 5 | output: rmarkdown::html_vignette
 6 | vignette: >
 7 |   %\VignetteIndexEntry{Demo: Offline cMAB LinUCB evaluation}
 8 |   %\VignetteEngine{knitr::rmarkdown}
 9 |   %\VignetteEncoding{UTF-8}
10 | ---
11 | 
12 | ```{r setup, include = FALSE, cache = TRUE}
13 | knitr::opts_chunk$set(
14 |   collapse = TRUE,
15 |   comment = "#>"
16 | )
17 | ```
18 | 
19 | ```r
20 | library(contextual)
21 | library(data.table)
22 | 
23 | # Import personalization data-set
24 | 
25 | library(contextual); library(data.table)
26 | 
27 | dt      <- fread("http://d1ie9wlkzugsxr.cloudfront.net/data_cmab_basic/data.txt") 
28 |                                     # 0/1 reward, 10 arms, 100 features
29 |                                     # arms always start from 1
30 | 
31 | #      z y x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15  .. x100
32 | #   1: 2 0  5  0  0 37  6  0  0  0  0  25   0   0   7   1   0  ..    0
33 | #   2: 8 0  1  3 36  0  0  0  0  0  0   0   0   1   0   0   0  ..   10
34 | #   3: . .  .  .  .  .  .  .  .  .  .   .   .   .   .   .   .  ..    .
35 | 
36 | horizon     <- nrow(dt)
37 | simulations <- 1
38 | 
39 | # Set up formula:       y      ~ z    | x1 + x2 + ..
40 | # In bandit parlance:   reward ~ arms | covariates or contextual features
41 | 
42 | f       <- y ~ z | . - z
43 | 
44 | # Instantiate Replay Bandit (Li, 2010)
45 | bandit  <- OfflineReplayEvaluatorBandit$new(formula = f, data = dt)
46 | 
47 | # Bind Policies withs Bandits through Agents, add Agents to list
48 | agents  <- list(Agent$new(LinUCBDisjointOptimizedPolicy$new(0.01), bandit, "alpha = 0.01"),
49 |                 Agent$new(LinUCBDisjointOptimizedPolicy$new(0.05), bandit, "alpha = 0.05"),
50 |                 Agent$new(LinUCBDisjointOptimizedPolicy$new(0.1),  bandit, "alpha = 0.1"),
51 |                 Agent$new(LinUCBDisjointOptimizedPolicy$new(1.0),  bandit, "alpha = 1.0"))
52 | 
53 | # Instantiate a Simulator
54 | simulation <- Simulator$new(agents, horizon = nrow(dt), simulations = 1)
55 | 
56 | # Run the simulation.
57 | history    <- simulation$run()
58 | 
59 | # plot the results
60 | plot(history, type = "cumulative", regret = FALSE, rate = TRUE, 
61 |      legend_position = "bottomright", ylim = c(0,1))
62 | ```
63 | 
64 | ![](linucboffline.png)
65 | 


--------------------------------------------------------------------------------
/vignettes/compare.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/compare.png


--------------------------------------------------------------------------------
/vignettes/contextual-fig-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/contextual-fig-1.png


--------------------------------------------------------------------------------
/vignettes/contextual-fig-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/contextual-fig-2.png


--------------------------------------------------------------------------------
/vignettes/eckles_kaptein.R:
--------------------------------------------------------------------------------
1 | ## ----setup, include = FALSE, cache = TRUE--------------------------------
2 | knitr::opts_chunk$set(
3 |   collapse = TRUE,
4 |   comment = "#>"
5 | )
6 | 
7 | 


--------------------------------------------------------------------------------
/vignettes/eckles_kaptein.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Demo: MAB Replication Eckles & Kaptein (Bootstrap Thompson Sampling)"
 3 | author: "Robin van Emden"
 4 | date: "`r Sys.Date()`"
 5 | output: rmarkdown::html_vignette
 6 | vignette: >
 7 |   %\VignetteIndexEntry{Demo: MAB Replication Eckles & Kaptein (Bootstrap Thompson Sampling)}
 8 |   %\VignetteEngine{knitr::rmarkdown}
 9 |   %\VignetteEncoding{UTF-8}
10 | ---
11 | 
12 | ```{r setup, include = FALSE, cache = TRUE}
13 | knitr::opts_chunk$set(
14 |   collapse = TRUE,
15 |   comment = "#>"
16 | )
17 | ```
18 | 
19 | ```r
20 | library(contextual)
21 | 
22 | # Replication of THOMPSON SAMPLING WITH THE ONLINE BOOTSTRAP By Dean Eckles and Maurits Kaptein
23 | 
24 | # This evaluations takes time - up to a few hours when run single core.
25 | 
26 | # Running the script in parallel (for example, on 8 cores) 
27 | # shortens the evaluation time substantially.
28 | 
29 | # https://arxiv.org/abs/1410.4009
30 | 
31 | # Fig 2. Empirical regret for Thompson sampling and BTS in a K-armed binomial bandit problem.
32 | 
33 | bandit             <- BasicBernoulliBandit$new(weights = c(0.5, rep(0.4,9)))
34 | 
35 | agents             <- list(Agent$new(BootstrapTSPolicy$new(1000), bandit, "BTS 1000"),
36 |                            Agent$new(ThompsonSamplingPolicy$new(), bandit, "TS"))
37 | 
38 | simulator          <- Simulator$new(agents        = agents,
39 |                                     do_parallel   = TRUE,
40 |                                     save_interval = 50,
41 |                                     set_seed      = 999,
42 |                                     horizon       = 1e+05,
43 |                                     simulations   = 1000)
44 | 
45 | simulator$run()
46 | 
47 | plot(simulator$history, log = "x")
48 | 
49 | ```
50 | 
51 | ![](eckles_kaptein_1.png)
52 | 


--------------------------------------------------------------------------------
/vignettes/eckles_kaptein_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/eckles_kaptein_0.png


--------------------------------------------------------------------------------
/vignettes/eckles_kaptein_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/eckles_kaptein_1.png


--------------------------------------------------------------------------------
/vignettes/eg_average_reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/eg_average_reward.png


--------------------------------------------------------------------------------
/vignettes/eg_cumulative_reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/eg_cumulative_reward.png


--------------------------------------------------------------------------------
/vignettes/eg_incorrect.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/eg_incorrect.png


--------------------------------------------------------------------------------
/vignettes/eg_optimal_action.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/eg_optimal_action.png


--------------------------------------------------------------------------------
/vignettes/epsilongreedy.R:
--------------------------------------------------------------------------------
1 | ## ----setup, include = FALSE, cache = TRUE--------------------------------
2 | knitr::opts_chunk$set(
3 |   collapse = TRUE,
4 |   comment = "#>"
5 | )
6 | 
7 | 


--------------------------------------------------------------------------------
/vignettes/epsilongreedy.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Demo: Basic Epsilon Greedy"
 3 | author: "Robin van Emden"
 4 | date: "`r Sys.Date()`"
 5 | output: rmarkdown::html_vignette
 6 | vignette: >
 7 |   %\VignetteIndexEntry{Demo: Basic Epsilon Greed}
 8 |   %\VignetteEngine{knitr::rmarkdown}
 9 |   %\VignetteEncoding{UTF-8}
10 | ---
11 | 
12 | ```{r setup, include = FALSE, cache = TRUE}
13 | knitr::opts_chunk$set(
14 |   collapse = TRUE,
15 |   comment = "#>"
16 | )
17 | ```
18 | 
19 | ```r
20 | library(contextual)
21 | 
22 | policy             <- EpsilonGreedyPolicy$new(epsilon = 0.1)
23 | 
24 | bandit             <- BasicBernoulliBandit$new(weights = c(0.6, 0.1, 0.1))
25 | agent              <- Agent$new(policy,bandit)
26 | 
27 | simulator          <- Simulator$new(agents      = agent,
28 |                                     horizon     = 100,
29 |                                     simulations = 1000)
30 |                                     
31 | history            <- simulator$run()
32 | 
33 | plot(history, type = "cumulative", regret = TRUE, disp = "ci",
34 |               traces_max = 100, traces_alpha = 0.1, traces = TRUE)
35 | ```
36 | 
37 | ![](basic_epsilon_greedy.png)
38 | 
39 | ```r
40 | summary(history)
41 |                                   
42 | ```
43 | 
44 | ```r
45 | Agents:
46 | 
47 |   EpsilonGreedy
48 | 
49 | Cumulative regret:
50 | 
51 |          agent   t sims cum_regret cum_regret_var cum_regret_sd
52 |  EpsilonGreedy 100 1000      8.951       116.7133      10.80339
53 | 
54 | 
55 | Cumulative reward:
56 | 
57 |          agent   t sims cum_reward cum_reward_var cum_reward_sd
58 |  EpsilonGreedy 100 1000      51.09       141.6215      11.90048
59 | 
60 | 
61 | Cumulative reward rate:
62 | 
63 |          agent   t sims cur_reward cur_reward_var cur_reward_sd
64 |  EpsilonGreedy 100 1000     0.5109       1.416215     0.1190048
65 | ```
66 | 


--------------------------------------------------------------------------------
/vignettes/introduction.R:
--------------------------------------------------------------------------------
1 | ## ----setup, include = FALSE, cache = TRUE-------------------------------------
2 | knitr::opts_chunk$set(
3 |   collapse = TRUE,
4 |   comment = "#>"
5 | )
6 | 
7 | 


--------------------------------------------------------------------------------
/vignettes/linucboffline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/linucboffline.png


--------------------------------------------------------------------------------
/vignettes/mabs.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Demo: MAB Policies Comparison"
 3 | author: "Robin van Emden"
 4 | date: "`r Sys.Date()`"
 5 | output: rmarkdown::html_vignette
 6 | vignette: >
 7 |   %\VignetteIndexEntry{Demo: MAB Policies Comparison}
 8 |   %\VignetteEngine{knitr::rmarkdown}
 9 |   %\VignetteEncoding{UTF-8}
10 | ---
11 | 
12 | ```{r setup, include = FALSE, cache = TRUE}
13 | knitr::opts_chunk$set(
14 |   collapse = TRUE,
15 |   comment = "#>"
16 | )
17 | ```
18 | 
19 | ```r
20 | library(contextual)
21 | 
22 | prob_per_arm       <- c(0.9, 0.1, 0.1)
23 | horizon            <- 100
24 | simulations        <- 1000
25 | 
26 | bandit             <- BasicBernoulliBandit$new(prob_per_arm)
27 | 
28 | agents             <- list(Agent$new(OraclePolicy$new(), bandit),
29 |                            Agent$new(EpsilonGreedyPolicy$new(0.1), bandit),
30 |                            Agent$new(ThompsonSamplingPolicy$new(1.0, 1.0), bandit),
31 |                            Agent$new(Exp3Policy$new(0.1), bandit),
32 |                            Agent$new(GittinsBrezziLaiPolicy$new(), bandit),
33 |                            Agent$new(UCB1Policy$new(), bandit),
34 |                            Agent$new(UCB2Policy$new(0.1), bandit))
35 | 
36 | simulation         <- Simulator$new(agents, horizon, simulations)
37 | history            <- simulation$run()
38 | 
39 | plot(history, type = "cumulative")
40 | ```
41 | 
42 | ![](mabs.png)
43 | 


--------------------------------------------------------------------------------
/vignettes/mabs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/mabs.png


--------------------------------------------------------------------------------
/vignettes/ml10m.R:
--------------------------------------------------------------------------------
1 | ## ----setup, include = FALSE, cache = TRUE--------------------------------
2 | knitr::opts_chunk$set(
3 |   collapse = TRUE,
4 |   comment = "#>"
5 | )
6 | 
7 | 


--------------------------------------------------------------------------------
/vignettes/ml10m.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/ml10m.png


--------------------------------------------------------------------------------
/vignettes/offline_depaul_movies.R:
--------------------------------------------------------------------------------
1 | ## ----setup, include = FALSE, cache = TRUE--------------------------------
2 | knitr::opts_chunk$set(
3 |   collapse = TRUE,
4 |   comment = "#>"
5 | )
6 | 
7 | 


--------------------------------------------------------------------------------
/vignettes/offline_depaul_movies.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Demo: Offline cMAB: CarsKit DePaul Movie Dataset"
 3 | author: "Robin van Emden"
 4 | date: "`r Sys.Date()`"
 5 | output: rmarkdown::html_vignette
 6 | vignette: >
 7 |   %\VignetteIndexEntry{Demo: Offline cMAB: CarsKit DePaul Movie Dataset}
 8 |   %\VignetteEngine{knitr::rmarkdown}
 9 |   %\VignetteEncoding{UTF-8}
10 | ---
11 | 
12 | ```{r setup, include = FALSE, cache = TRUE}
13 | knitr::opts_chunk$set(
14 |   collapse = TRUE,
15 |   comment = "#>"
16 | )
17 | ```
18 | 
19 | ```r
20 | library(contextual)
21 | library(data.table)
22 | 
23 | # Import personalization data-set
24 | 
25 | # Info: https://d1ie9wlkzugsxr.cloudfront.net/data_irecsys_CARSKit/Movie_DePaulMovie/README.txt
26 | 
27 | url         <- "http://d1ie9wlkzugsxr.cloudfront.net/data_irecsys_CARSKit/Movie_DePaulMovie/ratings.csv"
28 | data        <- fread(url, stringsAsFactors=TRUE)
29 | 
30 | # Convert data
31 | 
32 | data        <- contextual::one_hot(data, cols = c("Time","Location","Companion"), sparsifyNAs = TRUE)
33 | data[, itemid := as.numeric(itemid)]
34 | data[, rating := ifelse(rating <= 3, 0, 1)]
35 | 
36 | # Set simulation parameters.
37 | simulations <- 10  # here, "simulations" represents the number of boostrap samples
38 | horizon     <- nrow(data)
39 | 
40 | # Initiate Replay bandit with 10 arms and 100 context dimensions
41 | # Arms always start with 1 for the first arm
42 | log_S       <- data
43 | formula     <- formula("rating ~ itemid | Time_Weekday + Time_Weekend + Location_Cinema + 
44 |                                           Location_Home + Companion_Alone + Companion_Family + 
45 |                                           Companion_Partner")
46 | bandit      <- OfflineBootstrappedReplayBandit$new(formula = formula, data = data)
47 | 
48 | # Define agents.
49 | agents      <-
50 |   list(Agent$new(RandomPolicy$new(), bandit, "Random"),
51 |        Agent$new(EpsilonGreedyPolicy$new(0.03), bandit, "EGreedy 0.05"),
52 |        Agent$new(ThompsonSamplingPolicy$new(), bandit, "ThompsonSampling"),
53 |        Agent$new(LinUCBDisjointOptimizedPolicy$new(0.37), bandit, "LinUCB 0.37"))
54 | 
55 | # Initialize the simulation.
56 | simulation  <-
57 |   Simulator$new(
58 |     agents           = agents,
59 |     simulations      = simulations,
60 |     horizon          = horizon
61 |   )
62 | 
63 | # Run the simulation.
64 | # Takes +- 5 minutes: bootstrapbandit loops through arms x horizon x simulations (times nr of agents).
65 | sim  <- simulation$run()
66 | 
67 | # plot the results
68 | plot(sim, type = "cumulative", regret = FALSE, rate = TRUE,
69 |          legend_position = "topleft", ylim=c(0.48,0.87))
70 | 
71 | ```
72 | 
73 | ![](carskit_depaul.png)
74 | 


--------------------------------------------------------------------------------
/vignettes/replication-fig-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/replication-fig-1.png


--------------------------------------------------------------------------------
/vignettes/replication-fig-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/replication-fig-2.png


--------------------------------------------------------------------------------
/vignettes/replication.R:
--------------------------------------------------------------------------------
1 | ## ----setup, include = FALSE, cache = TRUE--------------------------------
2 | knitr::opts_chunk$set(
3 |   collapse = TRUE,
4 |   comment = "#>"
5 | )
6 | 
7 | 


--------------------------------------------------------------------------------
/vignettes/simpsons.R:
--------------------------------------------------------------------------------
1 | ## ----setup, include = FALSE, cache = TRUE-------------------------------------
2 | knitr::opts_chunk$set(
3 |   collapse = TRUE,
4 |   comment = "#>"
5 | )
6 | 
7 | 


--------------------------------------------------------------------------------
/vignettes/softmax_average_reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/softmax_average_reward.png


--------------------------------------------------------------------------------
/vignettes/softmax_cumulative_reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/softmax_cumulative_reward.png


--------------------------------------------------------------------------------
/vignettes/softmax_optimal_action.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/softmax_optimal_action.png


--------------------------------------------------------------------------------
/vignettes/sutton_barto.R:
--------------------------------------------------------------------------------
1 | ## ----setup, include = FALSE, cache = TRUE--------------------------------
2 | knitr::opts_chunk$set(
3 |   fig.pos = 'H',
4 |   collapse = TRUE,
5 |   comment = "#>"
6 | )
7 | 
8 | 


--------------------------------------------------------------------------------
/vignettes/sutton_eg_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/sutton_eg_1.png


--------------------------------------------------------------------------------
/vignettes/sutton_eg_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/sutton_eg_2.png


--------------------------------------------------------------------------------
/vignettes/sutton_gradient.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/sutton_gradient.png


--------------------------------------------------------------------------------
/vignettes/sutton_optimistic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/sutton_optimistic.png


--------------------------------------------------------------------------------
/vignettes/sutton_ucb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/sutton_ucb.png


--------------------------------------------------------------------------------
/vignettes/sutton_violin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/sutton_violin.png


--------------------------------------------------------------------------------
/vignettes/ucb_average_reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/ucb_average_reward.png


--------------------------------------------------------------------------------
/vignettes/ucb_cumulative_reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/ucb_cumulative_reward.png


--------------------------------------------------------------------------------
/vignettes/ucb_optimal_action.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/ucb_optimal_action.png


--------------------------------------------------------------------------------
/vignettes/website_optimization.R:
--------------------------------------------------------------------------------
1 | ## ----setup, include = FALSE, cache = TRUE--------------------------------
2 | knitr::opts_chunk$set(
3 |   fig.pos = 'H',
4 |   collapse = TRUE,
5 |   comment = "#>"
6 | )
7 | 
8 | 


--------------------------------------------------------------------------------