├── .Rbuildignore ├── .gitattributes ├── .gitignore ├── .travis.yml ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R ├── agent.R ├── bandit.R ├── bandit_basic_bernoulli.R ├── bandit_basic_gaussian.R ├── bandit_cmab_bernoulli.R ├── bandit_cmab_binary.R ├── bandit_cmab_hybrid.R ├── bandit_cmab_linear.R ├── bandit_cmab_logit.R ├── bandit_cmab_precaching.R ├── bandit_cmab_wheel.R ├── bandit_continuum_function.R ├── bandit_offline_bootstrapped_replay.R ├── bandit_offline_direct_method.R ├── bandit_offline_doubly_robust.R ├── bandit_offline_propensity_weighting.R ├── bandit_offline_replay_evaluator.R ├── bandit_offline_replay_evaluator_lookup.R ├── functions_generic.R ├── functions_utility.R ├── history.R ├── plot.R ├── policy.R ├── policy_cmab_lin_epoch_greedy.R ├── policy_cmab_lin_epsilon_greedy.R ├── policy_cmab_lin_ts_disjoint.R ├── policy_cmab_lin_ucb_disjoint.R ├── policy_cmab_lin_ucb_disjoint_optimized.R ├── policy_cmab_lin_ucb_general.R ├── policy_cmab_lin_ucb_hybrid.R ├── policy_cmab_lin_ucb_hybrid_optimized.R ├── policy_cmab_logit_ts_bootstrap.R ├── policy_cmab_probit_ts.R ├── policy_cont_lif.R ├── policy_fixed.R ├── policy_mab_epsilon_first.R ├── policy_mab_epsilon_greedy.R ├── policy_mab_exp3.R ├── policy_mab_gittins_bl.R ├── policy_mab_gradient.R ├── policy_mab_softmax.R ├── policy_mab_ts.R ├── policy_mab_ts_bootstrap.R ├── policy_mab_ucb1.R ├── policy_mab_ucb2.R ├── policy_oracle.R ├── policy_random.R └── simulator.R ├── README.md ├── _pkgdown.yml ├── appveyor.yml ├── codecov.yml ├── contextual.Rproj ├── cran-comments.md ├── demo ├── 00Index ├── alternative_parallel_backends │ ├── azure │ │ ├── cluster.json │ │ ├── credentials-sample.json │ │ ├── demo_azure.R │ │ ├── simulator_azure.R │ │ └── test_azure_connection.R │ ├── redis │ │ ├── demo_redis.R │ │ └── simulator_redis.R │ └── rmpi │ │ ├── demo_rmpi.R │ │ ├── simulator_rmpi.R │ │ └── test_rmpi_connection.R ├── demo_bandit_algorithms_for_website_optimization.R ├── demo_cmab_policy_comparison_linear_bandit.R ├── demo_cmab_policy_comparison_weight_bandit.R ├── demo_epsilon_greedy_policy.R ├── demo_epsilon_greedy_to_epoch_greedy_policy.R ├── demo_lif_bandit.R ├── demo_mab_policy_comparison.R ├── demo_offline_cmab_alpha_linucb_direct_method.R ├── demo_offline_cmab_alpha_linucb_replay.R ├── demo_simpsons_paradox_propensity.R ├── demo_sine_bandit.R ├── demo_subsubclass.R ├── demo_sutton_barto.R ├── evaluations_on_public_datasets │ ├── demo_carskit_depaul.R │ ├── demo_movielens_100k.R │ └── demo_movielens_10m.R ├── offline_bandit_evaluations │ ├── demo_offline_bootstrap_replay.R │ ├── demo_offline_direct_method.R │ ├── demo_offline_doubly_robust.R │ └── demo_offline_propensity_score.R ├── replication_eckles_kaptein_2014 │ ├── demo_bootstrap_fig_2.R │ └── demo_bootstrap_fig_3.R ├── replication_kruijswijk_2018 │ ├── 1_basic_synthetic_evaluation.R │ ├── 2a_main_synthetic_evaluation.R │ ├── 2b_dependent_observations_plot_bar.R │ ├── 3_offline_bootstrapped_persuasion.R │ ├── README.md │ ├── bandit_bernoulli.R │ ├── bandit_bootstrapped_replay.R │ ├── bandit_replay.R │ ├── beta_binom_hier_model.stan │ ├── policy_pooled_egreedy.R │ ├── policy_pooled_thompson.R │ └── policy_pooled_ucb.R ├── replication_kruijswijk_2019 │ ├── README.md │ ├── bandit_continuum_function_bimodal.R │ ├── bandit_continuum_function_unimodal.R │ ├── bandit_continuum_offon.R │ ├── bandit_continuum_offon_kern.R │ ├── demo_lif_bandit.R │ ├── demo_tbl_bandit.R │ ├── policy_cont_lif_randstart.R │ └── policy_tbl.R ├── replication_li_2010 │ ├── 1_import_yahoo_to_monetdb.R │ ├── 2_run_simulation.R │ ├── 3_plotter.R │ ├── 4_plotter.R │ ├── alternative_db_scripts │ │ ├── 1_import_yahoo_data_to_monetdb_lite.R │ │ ├── 2_run_the_simulation_on_monetdb.R │ │ ├── 2_run_the_simulation_on_monetdb_lite.R │ │ ├── yahoo_to_mysql.R │ │ ├── yahoo_to_postgresql.R │ │ └── yahoo_to_sqlite.R │ ├── demo_yahoo_classes │ │ ├── yahoo_bandit.R │ │ ├── yahoo_policy_epsilon_greedy.R │ │ ├── yahoo_policy_epsilon_greedy_seg.R │ │ ├── yahoo_policy_linucb_disjoint.R │ │ ├── yahoo_policy_linucb_hybrid.R │ │ ├── yahoo_policy_random.R │ │ ├── yahoo_policy_ucb1_alpha.R │ │ └── yahoo_policy_ucb1_alpha_seg.R │ └── demo_yahoo_exploration │ │ ├── exploration.R │ │ └── plots.R └── replication_van_emden_2018 │ ├── section_2_3.R │ ├── section_3_2_1.R │ ├── section_3_2_2.R │ ├── section_4_2_plot.R │ ├── section_5_2.R │ ├── section_5_3.R │ ├── section_5_4.R │ ├── section_6.R │ ├── section_7.R │ └── section_8.R ├── docs ├── 404.html ├── LICENSE-text.html ├── LICENSE.html ├── README.html ├── articles │ ├── _only_pkgdown_ │ │ └── faq.html │ ├── arxiv_2018 │ │ ├── fig │ │ │ ├── all_cmab_phases_Part1.pdf │ │ │ ├── all_cmab_phases_Part2.pdf │ │ │ ├── all_cmab_phases_Part3.pdf │ │ │ ├── all_cmab_phases_Part4.pdf │ │ │ ├── all_cmab_phases_Part5.pdf │ │ │ ├── all_cmab_phases_Part6.pdf │ │ │ ├── all_cmab_phases_Part7.pdf │ │ │ ├── all_cmab_phases_Part8.pdf │ │ │ ├── cmab_chart.pdf │ │ │ ├── contextual_class.pdf │ │ │ ├── contextual_sequence.pdf │ │ │ ├── offline_bandit.pdf │ │ │ ├── section_2_3.pdf │ │ │ ├── section_3_2_1.pdf │ │ │ ├── section_3_2_2.pdf │ │ │ ├── section_4_2_plot.pdf │ │ │ ├── section_5_2.pdf │ │ │ ├── section_5_3.pdf │ │ │ ├── section_5_4.pdf │ │ │ ├── section_5_5.pdf │ │ │ ├── section_8_bar.pdf │ │ │ └── section_8_plot.pdf │ │ ├── jss.aux │ │ ├── jss.bbl │ │ ├── jss.bst │ │ ├── jss.cls │ │ ├── jss.out │ │ ├── jss.pdf │ │ ├── jss.synctex.gz │ │ └── jsslogo.jpg │ ├── bandit_algorithms_for_website_optimization.html │ ├── basic_epsilon_greedy.jpeg │ ├── basic_epsilon_greedy.jpg │ ├── basic_epsilon_greedy.png │ ├── carskit_depaul.jpeg │ ├── carskit_depaul.jpg │ ├── carskit_depaul.png │ ├── cmabs.html │ ├── cmabs.jpeg │ ├── cmabs.jpg │ ├── cmabs.png │ ├── cmabsoffline.html │ ├── compare.png │ ├── contextual-fig-1.jpg │ ├── contextual-fig-1.png │ ├── contextual-fig-2.jpg │ ├── contextual-fig-2.png │ ├── eckles_kaptein.html │ ├── eckles_kaptein_1.jpg │ ├── eckles_kaptein_1.png │ ├── eg_average_reward.jpeg │ ├── eg_average_reward.jpg │ ├── eg_average_reward.png │ ├── eg_cumulative_reward.jpeg │ ├── eg_cumulative_reward.jpg │ ├── eg_cumulative_reward.png │ ├── eg_incorrect.jpeg │ ├── eg_incorrect.jpg │ ├── eg_incorrect.png │ ├── eg_optimal_action.jpeg │ ├── eg_optimal_action.jpg │ ├── eg_optimal_action.png │ ├── epsilongreedy.html │ ├── index.html │ ├── introduction.html │ ├── linucboffline.jpeg │ ├── linucboffline.jpg │ ├── linucboffline.png │ ├── mabs.html │ ├── mabs.jpeg │ ├── mabs.jpg │ ├── mabs.png │ ├── ml10m.html │ ├── ml10m.jpg │ ├── ml10m.png │ ├── offline_depaul_movies.html │ ├── only_pkgdown │ │ └── faq.html │ ├── replication-fig-1.jpg │ ├── replication-fig-1.png │ ├── replication-fig-2.jpg │ ├── replication-fig-2.png │ ├── replication.html │ ├── simpsons.html │ ├── softmax_average_reward.jpeg │ ├── softmax_average_reward.jpg │ ├── softmax_average_reward.png │ ├── softmax_cumulative_reward.jpeg │ ├── softmax_cumulative_reward.jpg │ ├── softmax_cumulative_reward.png │ ├── softmax_optimal_action.jpeg │ ├── softmax_optimal_action.jpg │ ├── softmax_optimal_action.png │ ├── sutton_barto.html │ ├── sutton_eg_1.jpeg │ ├── sutton_eg_1.jpg │ ├── sutton_eg_1.png │ ├── sutton_eg_2.jpeg │ ├── sutton_eg_2.jpg │ ├── sutton_eg_2.png │ ├── sutton_gradient.jpeg │ ├── sutton_gradient.jpg │ ├── sutton_gradient.png │ ├── sutton_optimistic.jpeg │ ├── sutton_optimistic.jpg │ ├── sutton_optimistic.png │ ├── sutton_ucb.jpeg │ ├── sutton_ucb.jpg │ ├── sutton_ucb.png │ ├── sutton_violin.jpeg │ ├── sutton_violin.jpg │ ├── sutton_violin.png │ ├── ucb_average_reward.jpeg │ ├── ucb_average_reward.jpg │ ├── ucb_average_reward.png │ ├── ucb_cumulative_reward.jpeg │ ├── ucb_cumulative_reward.jpg │ ├── ucb_cumulative_reward.png │ ├── ucb_optimal_action.jpeg │ ├── ucb_optimal_action.jpg │ ├── ucb_optimal_action.png │ └── website_optimization.html ├── authors.html ├── bootstrap-toc.css ├── bootstrap-toc.js ├── docsearch.css ├── docsearch.js ├── docsearch.json ├── favicon.ico ├── index.html ├── jquery.sticky-kit.min.js ├── link.svg ├── news │ └── index.html ├── pkgdown.css ├── pkgdown.js ├── pkgdown.yml └── reference │ ├── Agent.html │ ├── Bandit.html │ ├── BasicBernoulliBandit.html │ ├── BasicGaussianBandit.html │ ├── BootstrapTSPolicy.html │ ├── ContextualBasicBandit.html │ ├── ContextualBernoulliBandit.html │ ├── ContextualBernoulliPrecachingBandit.html │ ├── ContextualBinaryBandit.html │ ├── ContextualEpochGreedyPolicy.html │ ├── ContextualEpsilonGreedy.html │ ├── ContextualEpsilonGreedyPolicy.html │ ├── ContextualHybridBandit.html │ ├── ContextualLinTSPolicy.html │ ├── ContextualLinearBandit.html │ ├── ContextualLogitBTSPolicy.html │ ├── ContextualLogitBandit.html │ ├── ContextualPrecachingBandit.html │ ├── ContextualTSProbitPolicy.html │ ├── ContextualThompsonSamplingPolicy.html │ ├── ContextualWheelBandit.html │ ├── ContinuumBandit.html │ ├── EpsilonFirstPolicy-1.png │ ├── EpsilonFirstPolicy-2.png │ ├── EpsilonFirstPolicy.html │ ├── EpsilonGreedyPolicy-1.png │ ├── EpsilonGreedyPolicy-2.png │ ├── EpsilonGreedyPolicy.html │ ├── Exp3Policy-1.png │ ├── Exp3Policy-2.png │ ├── Exp3Policy.html │ ├── FixedPolicy.html │ ├── GittinsBrezziLaiPolicy.html │ ├── GlmUCBPolicy.html │ ├── GradientPolicy-1.png │ ├── GradientPolicy-2.png │ ├── GradientPolicy.html │ ├── History.html │ ├── LifPolicy.html │ ├── LinUCBDisjointOptimizedPolicy.html │ ├── LinUCBDisjointPolicy.html │ ├── LinUCBGeneralPolicy.html │ ├── LinUCBHybridOptimizedPolicy.html │ ├── LinUCBHybridPolicy.html │ ├── OfflineBootstrappedReplayBandit.html │ ├── OfflineDirectMethodBandit.html │ ├── OfflineDoublyRobustBandit.html │ ├── OfflineLookupReplayEvaluatorBandit-TODO-colon-Needs-to-be-documented-more-fully..html │ ├── OfflineLookupReplayEvaluatorBandit.html │ ├── OfflinePolicyEvaluatorBandit.html │ ├── OfflinePropensityWeightingBandit.html │ ├── OfflineReplayEvaluatorBandit.html │ ├── OraclePolicy.html │ ├── Plot.html │ ├── Policy.html │ ├── RandomPolicy-1.png │ ├── RandomPolicy.html │ ├── Simulator.html │ ├── SoftmaxPolicy-1.png │ ├── SoftmaxPolicy-2.png │ ├── SoftmaxPolicy.html │ ├── ThompsonSamplingPolicy-1.png │ ├── ThompsonSamplingPolicy.html │ ├── UCB1Policy-1.png │ ├── UCB1Policy-2.png │ ├── UCB1Policy.html │ ├── UCB2Policy-1.png │ ├── UCB2Policy-2.png │ ├── UCB2Policy.html │ ├── ci_boot.html │ ├── clip.html │ ├── clipr.html │ ├── data_table_factors_to_numeric.html │ ├── dec-set.html │ ├── figures │ ├── 1simulator.jpeg │ ├── 2agent.jpeg │ ├── 3abandit.jpeg │ ├── 3bpolicy.jpeg │ ├── 3cbandit.jpeg │ ├── 3dpolicy.jpeg │ ├── algoepsilonfirst.jpg │ ├── cmab_all.jpeg │ ├── cmab_all_large.jpg │ └── cmab_all_medium.jpg │ ├── formatted_difftime.html │ ├── get_arm_context.html │ ├── get_full_context.html │ ├── get_global_seed.html │ ├── inc-set.html │ ├── ind.html │ ├── index.html │ ├── inv.html │ ├── inv_logit.html │ ├── invgamma-1.png │ ├── invgamma.html │ ├── invlogit.html │ ├── is_rstudio.html │ ├── max_in.html │ ├── mvrnorm.html │ ├── one_hot.html │ ├── ones_in_zeroes.html │ ├── plot.history.html │ ├── print.history.html │ ├── prob_winner.html │ ├── sample_one_of.html │ ├── set_external.html │ ├── set_global_seed.html │ ├── sherman_morrisson.html │ ├── sim_post.html │ ├── sum_of.html │ ├── summary.history.html │ ├── value_remaining-1.png │ ├── value_remaining.html │ ├── var_welford.html │ ├── which_max_list.html │ └── which_max_tied.html ├── man ├── Agent.Rd ├── Bandit.Rd ├── BasicBernoulliBandit.Rd ├── BasicGaussianBandit.Rd ├── BootstrapTSPolicy.Rd ├── ContextualBernoulliBandit.Rd ├── ContextualBinaryBandit.Rd ├── ContextualEpochGreedyPolicy.Rd ├── ContextualEpsilonGreedyPolicy.Rd ├── ContextualHybridBandit.Rd ├── ContextualLinTSPolicy.Rd ├── ContextualLinearBandit.Rd ├── ContextualLogitBTSPolicy.Rd ├── ContextualLogitBandit.Rd ├── ContextualPrecachingBandit.Rd ├── ContextualTSProbitPolicy.Rd ├── ContextualWheelBandit.Rd ├── ContinuumBandit.Rd ├── EpsilonFirstPolicy.Rd ├── EpsilonGreedyPolicy.Rd ├── Exp3Policy.Rd ├── FixedPolicy.Rd ├── GittinsBrezziLaiPolicy.Rd ├── GradientPolicy.Rd ├── History.Rd ├── LifPolicy.Rd ├── LinUCBDisjointOptimizedPolicy.Rd ├── LinUCBDisjointPolicy.Rd ├── LinUCBGeneralPolicy.Rd ├── LinUCBHybridOptimizedPolicy.Rd ├── LinUCBHybridPolicy.Rd ├── OfflineBootstrappedReplayBandit.Rd ├── OfflineDirectMethodBandit.Rd ├── OfflineDoublyRobustBandit.Rd ├── OfflineLookupReplayEvaluatorBandit.Rd ├── OfflinePropensityWeightingBandit.Rd ├── OfflineReplayEvaluatorBandit.Rd ├── OraclePolicy.Rd ├── Plot.Rd ├── Policy.Rd ├── RandomPolicy.Rd ├── Simulator.Rd ├── SoftmaxPolicy.Rd ├── ThompsonSamplingPolicy.Rd ├── UCB1Policy.Rd ├── UCB2Policy.Rd ├── clipr.Rd ├── data_table_factors_to_numeric.Rd ├── dec-set.Rd ├── figures │ ├── 1simulator.jpeg │ ├── 2agent.jpeg │ ├── 3abandit.jpeg │ ├── 3bpolicy.jpeg │ ├── 3cbandit.jpeg │ ├── 3dpolicy.jpeg │ ├── algoepsilonfirst.jpg │ ├── cmab_all.jpeg │ ├── cmab_all_large.jpg │ └── cmab_all_medium.jpg ├── formatted_difftime.Rd ├── get_arm_context.Rd ├── get_full_context.Rd ├── get_global_seed.Rd ├── inc-set.Rd ├── ind.Rd ├── inv.Rd ├── invgamma.Rd ├── invlogit.Rd ├── is_rstudio.Rd ├── mvrnorm.Rd ├── one_hot.Rd ├── ones_in_zeroes.Rd ├── plot.history.Rd ├── print.history.Rd ├── prob_winner.Rd ├── sample_one_of.Rd ├── set_external.Rd ├── set_global_seed.Rd ├── sherman_morrisson.Rd ├── sim_post.Rd ├── sum_of.Rd ├── summary.history.Rd ├── value_remaining.Rd ├── var_welford.Rd ├── which_max_list.Rd └── which_max_tied.Rd ├── tests ├── figs │ ├── deps.txt │ └── plot │ │ ├── arm-plot.svg │ │ ├── arms-color.svg │ │ ├── arms-lims.svg │ │ ├── average-regret-plot.svg │ │ ├── average-reward-plot.svg │ │ ├── basic-cumulative-plot.svg │ │ ├── color-and-lty-stepping.svg │ │ ├── cumulative-sd-plot.svg │ │ ├── cumulative-traces-plot.svg │ │ ├── legend-title-and-labels-plot.svg │ │ ├── limits-plot.svg │ │ ├── lwd-pot.svg │ │ ├── only-sd-plot.svg │ │ ├── plot-inc-var-no-color.svg │ │ ├── traces-alpha-and-max-plot.svg │ │ ├── traces-plot-smooth.svg │ │ └── ylim-plot.svg ├── testthat.R └── testthat │ ├── history_context_test.ref │ ├── history_context_theta_test.ref │ ├── history_test.ref │ ├── history_theta_test.ref │ ├── setup_tests.R │ ├── teardown_tests.R │ ├── test_agent.R │ ├── test_bandits.R │ ├── test_history.R │ ├── test_plot.R │ ├── test_policies.R │ ├── test_policy.R │ └── test_utility_functions.R └── vignettes ├── 1.png ├── Rplot.png ├── basic_epsilon_greedy.png ├── carskit_depaul.png ├── cmabs.R ├── cmabs.Rmd ├── cmabs.png ├── cmabsoffline.R ├── cmabsoffline.Rmd ├── compare.png ├── contextual-fig-1.png ├── contextual-fig-2.png ├── eckles_kaptein.R ├── eckles_kaptein.Rmd ├── eckles_kaptein_0.png ├── eckles_kaptein_1.png ├── eg_average_reward.png ├── eg_cumulative_reward.png ├── eg_incorrect.png ├── eg_optimal_action.png ├── epsilongreedy.R ├── epsilongreedy.Rmd ├── introduction.R ├── introduction.Rmd ├── linucboffline.png ├── mabs.Rmd ├── mabs.png ├── ml10m.R ├── ml10m.Rmd ├── ml10m.png ├── offline_depaul_movies.R ├── offline_depaul_movies.Rmd ├── only_pkgdown ├── faq.Rmd └── faq.html ├── replication-fig-1.png ├── replication-fig-2.png ├── replication.R ├── replication.Rmd ├── simpsons.R ├── simpsons.Rmd ├── softmax_average_reward.png ├── softmax_cumulative_reward.png ├── softmax_optimal_action.png ├── sutton_barto.R ├── sutton_barto.Rmd ├── sutton_eg_1.png ├── sutton_eg_2.png ├── sutton_gradient.png ├── sutton_optimistic.png ├── sutton_ucb.png ├── sutton_violin.png ├── ucb_average_reward.png ├── ucb_cumulative_reward.png ├── ucb_optimal_action.png ├── website_optimization.R └── website_optimization.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^Meta$ 2 | ^doc$ 3 | ^vignettes/only_pkgdown$ 4 | ^vignettes/only_pkgdown.* 5 | ^demo/working_directory$ 6 | ^demo/working_directory.* 7 | contextual.Rproj 8 | ^.*\.Rproj$ 9 | ^\.Rproj\.user$ 10 | ^packrat/ 11 | ^\.Rprofile$ 12 | ^\.travis\.yml$ 13 | ^codecov\.yml$ 14 | ^appveyor\.yml$ 15 | ^_pkgdown\.yml$ 16 | ^progress\.txt$ 17 | ^doparallel\.log$ 18 | ^progress\.log$ 19 | ^docs$ 20 | ^vignettes/jss.* 21 | ^LICENSE\.md$ 22 | ^credentials\.json$ 23 | cran-comments.md 24 | ^revdep$ 25 | 26 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | data/* binary 4 | src/* text=lf 5 | R/* text=lf 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Meta 2 | doc 3 | packrat/lib*/ 4 | inst/doc 5 | tests/testthat/Rplots.pdf 6 | yahoodb-journal 7 | 8 | Thumbs.db 9 | .DS_Store 10 | *tmp* 11 | credentials.json 12 | **/credentials.json 13 | **/doparallel.log 14 | **/progress.log 15 | *.Rda 16 | *.Rdata 17 | *.Rds 18 | *.log 19 | *.LOG 20 | docs/reference/RandomPolicy-1.png 21 | # Created by https://www.gitignore.io/api/r 22 | ### R ### 23 | # History files 24 | .Rhistory 25 | .Rapp.history 26 | # Session Data files 27 | .RData 28 | # Example code in package build process 29 | *-Ex.R 30 | # Output files from R CMD build 31 | /*.tar.gz 32 | # Output files from R CMD check 33 | /*.Rcheck/ 34 | # RStudio files 35 | .Rproj.user/ 36 | # produced vignettes 37 | vignettes/*.html 38 | vignettes/*.pdf 39 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 40 | .httr-oauth 41 | # knitr and R markdown default cache directories 42 | *_cache/ 43 | cache/ 44 | demo/working_directory 45 | demo/working_directory* 46 | # Temporary files created by R markdown 47 | *.utf8.md 48 | *.knit.md 49 | # Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html 50 | rsconnect/ 51 | ### R.Bookdown Stack ### 52 | # R package: bookdown caching files 53 | *_files/ 54 | *.orig 55 | .env 56 | *.tex 57 | *.csv 58 | /revdep/.cache.rds 59 | .Rproj.user 60 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: r 2 | cache: packages 3 | os: 4 | - linux 5 | - osx 6 | r_packages: 7 | - covr 8 | after_success: 9 | - Rscript -e 'library(covr); codecov()' 10 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: contextual 2 | Type: Package 3 | Title: Simulation and Analysis of Contextual Multi-Armed Bandit Policies 4 | Version: 0.9.8.4 5 | Authors@R: c(person("Robin", "van Emden", role = c("aut", "cre"), 6 | comment = c(ORCID = "0000-0001-5820-8638"), email = "robinvanemden@gmail.com"), 7 | person("Maurits", "Kaptein", role = "ctb", email = "m.c.kaptein@tilburguniversity.edu", 8 | comment = c(ORCID = "0000-0002-6316-7524"))) 9 | Maintainer: Robin van Emden 10 | Description: Facilitates the simulation and evaluation of context-free 11 | and contextual multi-Armed Bandit policies or algorithms to ease the 12 | implementation, evaluation, and dissemination of both existing and 13 | new bandit algorithms and policies. 14 | License: GPL-3 15 | Encoding: UTF-8 16 | LazyData: true 17 | RoxygenNote: 7.1.1 18 | Depends: 19 | R (>= 3.5.0) 20 | Imports: 21 | R6 (>= 2.3.0), 22 | data.table, 23 | R.devices, 24 | foreach, 25 | doParallel, 26 | itertools, 27 | iterators, 28 | Formula, 29 | rjson 30 | Suggests: 31 | testthat, 32 | RCurl, 33 | splitstackshape, 34 | covr, 35 | knitr, 36 | here, 37 | rmarkdown, 38 | devtools, 39 | ggplot2, 40 | vdiffr 41 | VignetteBuilder: knitr 42 | URL: https://github.com/Nth-iteration-labs/contextual 43 | BugReports: https://github.com/Nth-iteration-labs/contextual/issues 44 | Roxygen: list(markdown = TRUE) 45 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | contextual 0.9.8.4 2 | ================== 3 | 4 | * Minor documentation updates. 5 | * Fix for Exp3 bug (thanks, @leferrad) 6 | * Cleanup of propensity score related code (thanks again, @leferrad) 7 | * Updated tests. 8 | 9 | 10 | contextual 0.9.8.3 11 | ================== 12 | 13 | * Tested and confirmed to be R 4.0.0 proof. 14 | * Minor documentation updates. 15 | * Now correctly restores global seed on completing a simulation (thanks, @pstansell) 16 | 17 | 18 | contextual 0.9.8.2 19 | ================== 20 | 21 | * Minor documentation update 22 | * Minor refactoring: Private utility functions moved from the History to the Plot class. 23 | 24 | contextual 0.9.8.1 25 | ================== 26 | 27 | * Specified previous version of set.seed sampler with RNGversion() calls 28 | 29 | contextual 0.9.8 30 | ================ 31 | 32 | * Major update 33 | * API change for offline Bandits 34 | * Fixes inverse propensity score weighting 35 | * Documentation updates 36 | * Additional demo scripts 37 | 38 | contextual 0.9.1 39 | ================ 40 | 41 | * First CRAN release 42 | 43 | contextual 0.9.0 44 | ================ 45 | 46 | * CRAN Submission 47 | -------------------------------------------------------------------------------- /R/policy_cmab_lin_epoch_greedy.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | ContextualEpochGreedyPolicy <- R6::R6Class( 3 | portable = FALSE, 4 | class = FALSE, 5 | inherit = Policy, 6 | public = list( 7 | sZl = NULL, 8 | exploration_phase = NULL, 9 | class_name = "ContextualEpochGreedyPolicy", 10 | initialize = function(sZl = 10) { 11 | super$initialize() 12 | self$sZl <- sZl 13 | }, 14 | set_parameters = function(context_params) { 15 | d <- context_params$d 16 | self$theta_to_arms <- list('A' = diag(1,d,d), 'b' = rep(0,d)) 17 | }, 18 | get_action = function(t, context) { 19 | 20 | if(t==1 || t%%self$sZl==0) self$exploration_phase = TRUE 21 | 22 | if (!isTRUE(self$exploration_phase)) { 23 | expected_rewards <- rep(0.0, context$k) 24 | for (arm in 1:context$k) { 25 | Xa <- get_arm_context(context, arm) 26 | A <- self$theta$A[[arm]] 27 | b <- self$theta$b[[arm]] 28 | A_inv <- inv(A) 29 | theta_hat <- A_inv %*% b 30 | expected_rewards[arm] <- Xa %*% theta_hat 31 | } 32 | action$choice <- which_max_tied(expected_rewards) 33 | 34 | } else { 35 | self$action$choice <- sample.int(context$k, 1, replace = TRUE) 36 | } 37 | action 38 | }, 39 | set_reward = function(t, context, action, reward) { 40 | arm <- action$choice 41 | reward <- reward$reward 42 | Xa <- get_arm_context(context, arm) 43 | 44 | if (isTRUE(self$exploration_phase)) { 45 | inc(self$theta$A[[arm]]) <- outer(Xa, Xa) 46 | inc(self$theta$b[[arm]]) <- reward * Xa 47 | self$exploration_phase <- FALSE 48 | } 49 | 50 | self$theta 51 | } 52 | ) 53 | ) 54 | #' Policy: A Time and Space Efficient Algorithm for Contextual Linear Bandits 55 | #' 56 | #' @name ContextualEpochGreedyPolicy 57 | #' 58 | #' 59 | #' @section Usage: 60 | #' \preformatted{ 61 | #' policy <- ContextualEpochGreedyPolicy$new(sZl = 10) 62 | #' } 63 | #' 64 | #' @seealso 65 | #' 66 | #' Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}}, 67 | #' \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}} 68 | #' 69 | #' Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}}, 70 | #' \code{\link{OfflineReplayEvaluatorBandit}} 71 | #' 72 | #' Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}} 73 | NULL 74 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | # DO NOT CHANGE the "init" and "install" sections below 2 | 3 | # Download script file from GitHub 4 | init: 5 | ps: | 6 | $ErrorActionPreference = "Stop" 7 | Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1" 8 | Import-Module '..\appveyor-tool.ps1' 9 | 10 | install: 11 | ps: Bootstrap 12 | 13 | environment: 14 | R_VERSION: stable 15 | PKGTYPE: binary 16 | USE_RTOOLS: true 17 | R_REMOTES_STANDALONE: true 18 | 19 | cache: 20 | - C:\RLibrary 21 | 22 | # Adapt as necessary starting from here 23 | 24 | build_script: 25 | - travis-tool.sh install_deps 26 | 27 | test_script: 28 | - travis-tool.sh run_tests 29 | 30 | on_failure: 31 | - 7z a failure.zip *.Rcheck\* 32 | - appveyor PushArtifact failure.zip 33 | 34 | artifacts: 35 | - path: '*.Rcheck\**\*.log' 36 | name: Logs 37 | 38 | - path: '*.Rcheck\**\*.out' 39 | name: Logs 40 | 41 | - path: '*.Rcheck\**\*.fail' 42 | name: Logs 43 | 44 | - path: '*.Rcheck\**\*.Rout' 45 | name: Logs 46 | 47 | - path: '\*_*.tar.gz' 48 | name: Bits 49 | 50 | - path: '\*_*.zip' 51 | name: Bits 52 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | patch: 10 | default: 11 | target: auto 12 | threshold: 1% 13 | 14 | language: R 15 | sudo: false 16 | cache: packages 17 | after_success: 18 | - Rscript -e 'covr::codecov()' 19 | -------------------------------------------------------------------------------- /contextual.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: No 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageCheckArgs: --as-cran 22 | PackageRoxygenize: rd,collate,namespace,vignette 23 | 24 | QuitChildProcessesOnExit: Yes 25 | DisableExecuteRprofile: Yes 26 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | * Minor documentation updates. 2 | * Fix for Exp3 bug (thanks, @leferrad) 3 | * Cleanup of propensity score related code (thanks again, @leferrad) 4 | * Updated tests. 5 | 6 | ## Test environments 7 | 8 | * local Windows 10 (x64) release, R 4.0.2 9 | * OS X install (on travis-ci) R-release 10 | * Ubuntu 12.04 (on travis-ci) R-release 11 | * Windows Server 2012 R2 x64 install (on appveyor), R 4.0.2 12 | * Rhub: 13 | * Fedora Linux, R-devel, clang, gfortran 14 | * Ubuntu Linux 16.04 LTS, R-release, GCC 15 | * Windows Server 2008 R2 SP1, R-devel, 32/64 bit 16 | * win-builder (devel, oldrelease and release) 17 | 18 | ## R CMD check results 19 | 20 | ### Generally no errors, no warnings, no notes 21 | 22 | ``` 23 | 0 ERRORs | 0 WARNINGs | 0 NOTES. 24 | ``` 25 | 26 | ### Oldrelease and Ubuntu Linux 16.04: 1 NOTE 27 | 28 | ``` 29 | Author field differs from that derived from Authors@R 30 | Author: 'Robin van Emden [aut, cre] (), Maurits Kaptein [ctb] ()' 31 | Authors@R: 'Robin van Emden [aut, cre] (0000-0001-5820-8638), Maurits Kaptein [ctb] (0000-0002-6316-7524)' 32 | ``` 33 | The only way to get rid of this is by removing the ORCID from the Authors@R comment field - which is processed correctly in R versions later than oldrelease/Ununtu 16.04. Presume this can safely be ignored. 34 | 35 | ## Downstream dependencies 36 | 37 | No ERRORs or WARNINGs found 38 | -------------------------------------------------------------------------------- /demo/00Index: -------------------------------------------------------------------------------- 1 | demo_subsubclass Further subclassing of existing policies and bandits. 2 | demo_sine_bandit Bandit reward function fluctuating over time. 3 | demo_offline_cmab_alpha_linucb_direct_method Offline bandit and parameter evaluation - direct method. 4 | demo_offline_cmab_alpha_linucb_replay Offline bandit and parameter evaluation - replay. 5 | demo_mab_policy_comparison Comparison of some contextual-free bandits. 6 | demo_epsilon_greedy_policy Basic simulation of a context-free policy. 7 | demo_lif_bandit Use of continuum bandit and LiF policy. 8 | demo_cmab_policy_comparison_linear_bandit Comparison of a contextual policies with linear bandit. 9 | demo_cmab_policy_comparison_weight_bandit Comparison of a contextual policies with weight bandit. 10 | demo_simpsons_paradox_propensity Simpson's Paradox to demonstrate propensity weighting. 11 | demo_sutton_barto Contextual code reproducing Sutton & Barto (2018) plots. 12 | demo_bandit_algorithms_for_website_optimization Contextual code reproducing John Myles White (2012) plots. 13 | demo_epsilon_greedy_to_epoch_greedy_policy Contextual epsilon epoch and greedy. 14 | -------------------------------------------------------------------------------- /demo/alternative_parallel_backends/azure/cluster.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "contextual_test_cluster_1", 3 | "vmSize": "Standard_F2", 4 | "maxTasksPerNode": 2, 5 | "poolSize": { 6 | "dedicatedNodes": { 7 | "min": 2, 8 | "max": 2 9 | }, 10 | "lowPriorityNodes": { 11 | "min": 0, 12 | "max": 0 13 | }, 14 | "autoscaleFormula": "QUEUE" 15 | }, 16 | "rPackages": { 17 | "cran": ["foreach", "data.table", "itertools"], 18 | "github": ["Nth-iteration-labs/contextual"], 19 | "githubAuthenticationToken": "" 20 | }, 21 | "commandLine": [] 22 | } 23 | -------------------------------------------------------------------------------- /demo/alternative_parallel_backends/azure/credentials-sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "batchAccount": { 3 | "name": "batch_account_name_here", 4 | "key": "batch_account_key_here", 5 | "url": "https://batch_account_name_here.area_here.batch.azure.com" 6 | }, 7 | "storageAccount": { 8 | "name": "storate_account_name_here", 9 | "key": "storate_account_key_here" 10 | }, 11 | "githubAuthenticationToken": "githubAuthenticationToken_here" 12 | } 13 | -------------------------------------------------------------------------------- /demo/alternative_parallel_backends/azure/demo_azure.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | library(here) 3 | 4 | setwd(here::here("demo","alt_par_backend_examples","azure")) 5 | 6 | source("simulator_azure.R") 7 | 8 | #devtools::install_github("Azure/rAzureBatch") 9 | #devtools::install_github("Azure/doAzureParallel") 10 | 11 | ## follow setup and install of doAzureParallel 12 | ## at https://github.com/Azure/doAzureParallel 13 | 14 | ## sample credentials in the same directory as this file 15 | ## add your credentials and save to the current directory 16 | 17 | horizon <- 1000L 18 | simulations <- 4L 19 | 20 | bandit <- ContextualLinearBandit$new(k = 5, d = 5) 21 | 22 | agents <-list(Agent$new(EpsilonGreedyPolicy$new(0.1), bandit), 23 | Agent$new(ContextualLogitBTSPolicy$new(10), bandit), 24 | Agent$new(LinUCBDisjointOptimizedPolicy$new(1.0), bandit)) 25 | 26 | simulation <- AzureSimulator$new(agents, horizon, simulations) 27 | 28 | history <- simulation$run() 29 | 30 | plot(history, type = "cumulative", rate = FALSE, legend_position = "topleft") 31 | -------------------------------------------------------------------------------- /demo/alternative_parallel_backends/azure/simulator_azure.R: -------------------------------------------------------------------------------- 1 | # AzureSimulator is a subclass of Simulator 2 | # substituting doParallel with doAzureParallel. 3 | 4 | # devtools::install_github("Azure/rAzureBatch") 5 | # devtools::install_github("Azure/doAzureParallel") 6 | 7 | library(contextual) 8 | library(foreach) 9 | library(doAzureParallel) 10 | library(here) 11 | 12 | setwd(here::here("demo","alt_par_backend_examples","azure")) 13 | 14 | AzureSimulator <- R6::R6Class( 15 | inherit = Simulator, 16 | public = list( 17 | register_parallel_backend = function() { 18 | 19 | # 1. Generate your credential and cluster configuration files. 20 | doAzureParallel::generateClusterConfig("cluster.json") 21 | doAzureParallel::generateCredentialsConfig("credentials.json") 22 | 23 | # 2. Fill out your credential config and cluster config files. 24 | 25 | # 3. Set your credentials - you need to give the R session your credentials to interact with Azure 26 | doAzureParallel::setCredentials("credentials.json") 27 | 28 | # 4. Register the pool. This will create a new pool if your pool hasn't already been provisioned. 29 | super$cl <- doAzureParallel::makeCluster("cluster.json") 30 | 31 | # 5. Register the pool as your parallel backend 32 | doAzureParallel::registerDoAzureParallel(super$cl) 33 | 34 | # 6. Check that your parallel backend has been registered 35 | super$workers = foreach::getDoParWorkers() 36 | 37 | message(paste0("Azure workers: ", super$workers)) 38 | }, 39 | stop_parallel_backend = function() { 40 | try({ 41 | doAzureParallel::stopCluster(super$cl) 42 | }) 43 | } 44 | ) 45 | ) 46 | -------------------------------------------------------------------------------- /demo/alternative_parallel_backends/azure/test_azure_connection.R: -------------------------------------------------------------------------------- 1 | library(foreach) 2 | library(doAzureParallel) 3 | library(here) 4 | 5 | setwd(here::here("demo","alternative_parallel_backends","azure")) 6 | 7 | # 1. Generate your credential and cluster configuration files. 8 | doAzureParallel::generateClusterConfig("cluster.json") 9 | doAzureParallel::generateCredentialsConfig("credentials.json") 10 | 11 | # 2. Fill out your credential config and cluster config files. 12 | # Enter your Azure Batch Account & Azure Storage keys/account-info into your 13 | # credential config ("credentials.json") and configure your cluster in your 14 | # cluster config ("cluster.json") 15 | 16 | # 3. Set your credentials - you need to give the R session your credentials to 17 | # interact with Azure 18 | doAzureParallel::setCredentials("credentials.json") 19 | 20 | # 4. Register the pool. This will create a new pool if your pool hasn't already 21 | # been provisioned. 22 | cl <- doAzureParallel::makeCluster("cluster.json") 23 | 24 | # 5. Register the pool as your parallel backend 25 | doAzureParallel::registerDoAzureParallel(cl) 26 | 27 | # 6. Check that your parallel backend has been registered 28 | workers = foreach::getDoParWorkers() 29 | message(paste0("Workers: ",workers)) 30 | 31 | clusters <- doAzureParallel::getClusterList() 32 | print(clusters) 33 | 34 | doAzureParallel::stopCluster(cl) 35 | -------------------------------------------------------------------------------- /demo/alternative_parallel_backends/redis/demo_redis.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | library(here) 3 | 4 | setwd(here::here("demo","alt_par_backend_examples","redis")) 5 | 6 | source("simulator_redis.R") 7 | 8 | library(contextual) 9 | 10 | horizon <- 1000L 11 | simulations <- 4L 12 | 13 | bandit <- ContextualLinearBandit$new(k = 5, d = 5) 14 | 15 | agents <-list(Agent$new(EpsilonGreedyPolicy$new(0.1), bandit), 16 | Agent$new(ContextualLogitBTSPolicy$new(10), bandit), 17 | Agent$new(LinUCBDisjointOptimizedPolicy$new(1.0), bandit)) 18 | 19 | simulation <- RedisSimulator$new(agents, horizon, simulations) 20 | 21 | history <- simulation$run() 22 | 23 | plot(history, type = "cumulative", rate = FALSE, legend_position = "topleft") 24 | -------------------------------------------------------------------------------- /demo/alternative_parallel_backends/redis/simulator_redis.R: -------------------------------------------------------------------------------- 1 | # RedisSimulator is a subclass of Simulator 2 | # substituting doParallel with doRedis. 3 | 4 | # Before running the example, follow instructions at: 5 | # 6 | # https://github.com/bwlewis/doRedis 7 | # 8 | # Then open one or more R sessions that will act as back-end worker processes. 9 | # Run the following in each session: 10 | # 11 | # require('doRedis') 12 | # redisWorker('jobs') 13 | 14 | library(contextual) 15 | library(foreach) 16 | library(doRedis) 17 | 18 | RedisSimulator <- R6::R6Class( 19 | inherit = Simulator, 20 | public = list( 21 | register_parallel_backend = function() { 22 | options('redis:num'=TRUE) 23 | doRedis::registerDoRedis('jobs') 24 | super$workers = foreach::getDoParWorkers() 25 | }, 26 | stop_parallel_backend = function() { 27 | try({ 28 | doRedis::removeQueue('jobs') 29 | }) 30 | } 31 | ) 32 | ) 33 | -------------------------------------------------------------------------------- /demo/alternative_parallel_backends/rmpi/demo_rmpi.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | library(here) 3 | 4 | setwd(here::here("demo","alt_par_backend_examples","rmpi")) 5 | 6 | source("simulator_rmpi.R") 7 | 8 | library(contextual) 9 | 10 | horizon <- 1000L 11 | simulations <- 4L 12 | 13 | bandit <- ContextualLinearBandit$new(k = 5, d = 5) 14 | 15 | agents <-list(Agent$new(EpsilonGreedyPolicy$new(0.1), bandit), 16 | Agent$new(ContextualLogitBTSPolicy$new(10), bandit), 17 | Agent$new(LinUCBDisjointOptimizedPolicy$new(1.0), bandit)) 18 | 19 | simulation <- MPISimulator$new(agents, horizon, simulations) 20 | 21 | history <- simulation$run() 22 | 23 | plot(history, type = "cumulative", rate = FALSE, legend_position = "topleft") 24 | -------------------------------------------------------------------------------- /demo/alternative_parallel_backends/rmpi/simulator_rmpi.R: -------------------------------------------------------------------------------- 1 | # MPISimulator is a subclass of Simulator 2 | # substituting doParallel with doMPI. 3 | 4 | library(contextual) 5 | library(foreach) 6 | library(Rmpi) 7 | library(doMPI) 8 | 9 | MPISimulator <- R6::R6Class( 10 | inherit = Simulator, 11 | public = list( 12 | register_parallel_backend = function() { 13 | super$cl <- doMPI::startMPIcluster() 14 | doMPI::registerDoMPI(super$cl) 15 | super$workers = foreach::getDoParWorkers() 16 | message(paste0("MPI workers: ", super$workers)) 17 | }, 18 | stop_parallel_backend = function() { 19 | try({ 20 | doMPI::closeCluster(super$cl) 21 | }) 22 | } 23 | ) 24 | ) 25 | -------------------------------------------------------------------------------- /demo/alternative_parallel_backends/rmpi/test_rmpi_connection.R: -------------------------------------------------------------------------------- 1 | library(foreach) 2 | library(Rmpi) 3 | 4 | # Instructions for installing Rmpi: http://fisher.stats.uwo.ca/faculty/yu/Rmpi/ 5 | 6 | mpi.spawn.Rslaves() 7 | Sys.sleep(3) 8 | 9 | mpi.setup.rngstream(iseed=123) 10 | mpi.parReplicate(80, mean(rnorm(1000000))) 11 | 12 | mpi.close.Rslaves() 13 | -------------------------------------------------------------------------------- /demo/demo_cmab_policy_comparison_linear_bandit.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | 3 | horizon <- 400L 4 | simulations <- 300L 5 | 6 | bandit <- ContextualLinearBandit$new(k = 5, d = 5, sigma = 0.1) 7 | 8 | agents <- list(Agent$new(EpsilonGreedyPolicy$new(0.1), bandit, "EGreedy"), 9 | Agent$new(ContextualEpsilonGreedyPolicy$new(0.1), bandit, "cEGreedy"), 10 | Agent$new(ContextualLinTSPolicy$new(0.01), bandit, "LinTS"), 11 | Agent$new(LinUCBDisjointOptimizedPolicy$new(1), bandit, "LinUCB")) 12 | 13 | simulation <- Simulator$new(agents, horizon, simulations) 14 | 15 | history <- simulation$run() 16 | 17 | plot(history, type = "cumulative", regret = FALSE, rate = TRUE, legend_position = "topleft") 18 | -------------------------------------------------------------------------------- /demo/demo_cmab_policy_comparison_weight_bandit.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | 3 | horizon <- 1000L 4 | simulations <- 100L 5 | 6 | weights <- matrix(c(0.8, 0.1, 0.1, 7 | 0.1, 0.8, 0.1, 8 | 0.1, 0.1, 0.8), nrow = 3, ncol = 3, byrow = TRUE) 9 | 10 | bandit <- ContextualBinaryBandit$new(weights = weights) 11 | agents <- list(Agent$new(ContextualTSProbitPolicy$new(draws = 100), bandit, "TSProbit"), 12 | Agent$new(ContextualEpsilonGreedyPolicy$new(0.1), bandit, "cEGreedy"), 13 | Agent$new(ContextualLogitBTSPolicy$new(draws = 100), bandit, "LogitBTS"), 14 | Agent$new(LinUCBDisjointPolicy$new(0.6), bandit, "LinUCB")) 15 | 16 | simulation <- Simulator$new(agents, horizon, simulations) 17 | history <- simulation$run() 18 | 19 | plot(history, type = "cumulative", no_par = TRUE, legend_border = FALSE, legend_position = "topleft") 20 | -------------------------------------------------------------------------------- /demo/demo_epsilon_greedy_policy.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | 3 | policy <- EpsilonGreedyPolicy$new(epsilon = 0.1) 4 | 5 | bandit <- BasicBernoulliBandit$new(weights = c(0.6, 0.1, 0.1)) 6 | 7 | agent <- Agent$new(policy,bandit) 8 | 9 | simulator <- Simulator$new(agents = agent, 10 | horizon = 100, 11 | simulations = 1000) 12 | simulator$run() 13 | 14 | plot(simulator$history, type = "cumulative", regret = TRUE, disp = "ci", 15 | traces = TRUE, traces_max = 100, traces_alpha = 0.1) 16 | 17 | summary(simulator$history) 18 | 19 | sim_data <- simulator$history$get_data_table() 20 | sim_cum_data <- simulator$history$get_cumulative_data() 21 | -------------------------------------------------------------------------------- /demo/demo_lif_bandit.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | 3 | horizon <- 1000 4 | simulations <- 100 5 | 6 | continuous_arms <- function(x) { 7 | -0.1*(x - 5) ^ 2 + 3.5 + rnorm(length(x),0,0.4) 8 | } 9 | 10 | int_time <- 100 11 | amplitude <- 0.2 12 | learn_rate <- 0.3 13 | omega <- 2*pi/int_time 14 | x0_start <- 2.0 15 | 16 | policy <- LifPolicy$new(int_time, amplitude, learn_rate, omega, x0_start) 17 | 18 | bandit <- ContinuumBandit$new(FUN = continuous_arms) 19 | 20 | agent <- Agent$new(policy,bandit) 21 | 22 | history <- Simulator$new(agents = agent, 23 | horizon = horizon, 24 | simulations = simulations)$run() 25 | 26 | plot(history, type = "average", regret = FALSE) 27 | -------------------------------------------------------------------------------- /demo/demo_mab_policy_comparison.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | 3 | prob_per_arm <- c(0.5, 0.3, 0.1) 4 | horizon <- 150 5 | simulations <- 2000 6 | 7 | bandit <- BasicBernoulliBandit$new(prob_per_arm) 8 | 9 | agents <- list(Agent$new(EpsilonGreedyPolicy$new(0.1), bandit), 10 | Agent$new(ThompsonSamplingPolicy$new(1, 1), bandit), 11 | Agent$new(Exp3Policy$new(0.1), bandit), 12 | Agent$new(GittinsBrezziLaiPolicy$new(), bandit), 13 | Agent$new(UCB1Policy$new(), bandit), 14 | Agent$new(UCB2Policy$new(0.1), bandit)) 15 | 16 | simulation <- Simulator$new(agents, horizon, simulations) 17 | history <- simulation$run() 18 | 19 | plot(history, type = "cumulative") 20 | 21 | summary(history) 22 | -------------------------------------------------------------------------------- /demo/demo_offline_cmab_alpha_linucb_direct_method.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | library(data.table) 3 | library(Formula) 4 | 5 | # Import personalization data-set 6 | 7 | data <- fread("http://d1ie9wlkzugsxr.cloudfront.net/data_cmab_basic/data.txt") 8 | # 0/1 reward, 10 arms, 100 features 9 | # arms always start from 1 10 | 11 | # z y x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15 .. x100 12 | # 1: 2 0 5 0 0 37 6 0 0 0 0 25 0 0 7 1 0 .. 0 13 | # 2: 8 0 1 3 36 0 0 0 0 0 0 0 0 1 0 0 0 .. 10 14 | # 3: . . . . . . . . . . . . . . . . . .. . 15 | 16 | simulations <- 1 17 | horizon <- nrow(data) 18 | 19 | # Run regression per arm, predict outcomes, and save results, a column per arm 20 | 21 | x <- reformulate(names(data)[3:102],response="y") # x: x1 .. x100 22 | f <- Formula::as.Formula(x) # y ~ x 23 | 24 | model_f <- function(arm) glm(f, data=data[z==arm], family=binomial(link="logit"), y=F, model=F) 25 | arms <- sort(unique(data$z)) 26 | model_arms <- lapply(arms, FUN = model_f) 27 | 28 | predict_arm <- function(model) predict(model, data, type = "response") 29 | r_data <- lapply(model_arms, FUN = predict_arm) 30 | r_data <- do.call(cbind, r_data) 31 | colnames(r_data) <- paste0("r", (1:max(arms))) 32 | 33 | # Bind data and model predictions 34 | 35 | data <- cbind(data,r_data) 36 | 37 | # Run direct method style offline bandit 38 | 39 | x <- reformulate(names(data)[3:102], response="y") 40 | z <- ~ z 41 | r <- ~ r1 + r2 + r3 + r4 + r5 + r6 + r7 + r8 + r9 + r10 42 | 43 | f <- as.Formula(z,x,r) # Resulting in: y ~ z | x1 + x2 .. | r1 + r2 + .. 44 | 45 | bandit <- OfflineDirectMethodBandit$new(formula = f, data = data) 46 | 47 | # Define agents. 48 | agents <- list(Agent$new(LinUCBDisjointOptimizedPolicy$new(0.01), bandit, "alpha = 0.01"), 49 | Agent$new(LinUCBDisjointOptimizedPolicy$new(0.05), bandit, "alpha = 0.05"), 50 | Agent$new(LinUCBDisjointOptimizedPolicy$new(0.1), bandit, "alpha = 0.1"), 51 | Agent$new(LinUCBDisjointOptimizedPolicy$new(1.0), bandit, "alpha = 1.0")) 52 | 53 | # Initialize the simulation. 54 | 55 | simulation <- Simulator$new(agents = agents, simulations = simulations, horizon = horizon) 56 | 57 | # Run the simulation. 58 | sim <- simulation$run() 59 | 60 | # plot the results 61 | plot(sim, type = "cumulative", regret = FALSE, rate = TRUE, legend_position = "bottomright", ylim = c(0,1)) 62 | -------------------------------------------------------------------------------- /demo/demo_offline_cmab_alpha_linucb_replay.R: -------------------------------------------------------------------------------- 1 | library(contextual); library(data.table) 2 | 3 | dt <- fread("http://d1ie9wlkzugsxr.cloudfront.net/data_cmab_basic/data.txt") 4 | # 0/1 reward, 10 arms, 100 features 5 | # arms always start from 1 6 | 7 | # z y x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15 .. x100 8 | # 1: 2 0 5 0 0 37 6 0 0 0 0 25 0 0 7 1 0 .. 0 9 | # 2: 8 0 1 3 36 0 0 0 0 0 0 0 0 1 0 0 0 .. 10 10 | # 3: . . . . . . . . . . . . . . . . . .. . 11 | 12 | # Set up formula: y ~ z | x1 + x2 + .. 13 | # In bandit parlance: reward ~ arms | covariates or contextual features 14 | 15 | f <- y ~ z | . - z 16 | 17 | # Instantiate Replay Bandit (Li, 2010) 18 | bandit <- OfflineReplayEvaluatorBandit$new(formula = f, data = dt) 19 | 20 | # Bind Policies withs Bandits through Agents, add Agents to list 21 | agents <- list( 22 | Agent$new(UCB2Policy$new(0.01), bandit, "UCB2 alpha = 0.01"), 23 | Agent$new(LinUCBDisjointPolicy$new(0.01), bandit, "LinUCB alpha = 0.01"), 24 | Agent$new(LinUCBDisjointPolicy$new(0.1), bandit, "LinUCB alpha = 0.1")) 25 | 26 | # Instantiate and run a Simulator, plot the resulting History object 27 | history <- Simulator$new(agents, horizon = nrow(dt), simulations = 5)$run() 28 | plot(history, type = "cumulative", regret = FALSE, legend_border = FALSE) 29 | -------------------------------------------------------------------------------- /demo/demo_sine_bandit.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | 3 | # Based on a section of Dai Shi's thesis 4 | # "Exploring Bandit Algorithms for Automatic Content Selection" 5 | 6 | horizon <- 600L 7 | simulations <- 300L 8 | 9 | TwoArmedSineBandit <- R6::R6Class( 10 | inherit = Bandit, 11 | class = FALSE, 12 | public = list( 13 | sigma = NULL, 14 | class_name = "TwoArmedSineBandit", 15 | initialize = function(k = 2, sigma = 0.2) { 16 | self$k = k 17 | self$sigma = sigma 18 | }, 19 | get_context = function(t) { 20 | context <- list(k = self$k) 21 | }, 22 | get_reward = function(t, context, action) { 23 | rseq <- seq(0,2, by = 2/self$k)[-(self$k+1)] 24 | sine <- sapply(rseq,self$sine,t) 25 | reward <- sine + rnorm(1, sd = self$sigma) 26 | reward <- list( 27 | reward = reward[action$choice], 28 | optimal_reward = sine[which_max_tied(sine)], 29 | optimal_arm = contextual::which_max_tied(sine) 30 | ) 31 | }, 32 | sine = function(phi, t) { 33 | omega <- 0.125; A <- 0.5; p <- 1.0; 34 | A * (sin(omega * pi * t /10 + phi * pi) + p) 35 | } 36 | ) 37 | ) 38 | 39 | bandit <- TwoArmedSineBandit$new() 40 | 41 | agents <- list(Agent$new(Exp3Policy$new(0.1), bandit), 42 | Agent$new(UCB1Policy$new(), bandit)) 43 | 44 | simulation <- Simulator$new(agents, horizon = horizon, simulations = simulations, do_parallel = TRUE) 45 | 46 | history <- simulation$run() 47 | 48 | plot(history, type = "average", regret = FALSE, disp = "var", plot_only_disp = TRUE) 49 | plot(history, type = "average", regret = TRUE, disp = "var", plot_only_disp = TRUE) 50 | plot(history, type = "cumulative", disp = "var", rate = TRUE, plot_only_disp = TRUE) 51 | plot(history, type = "average", regret = FALSE, disp = "var") 52 | plot(history, type = "average", regret = TRUE, disp = "var") 53 | plot(history, type = "cumulative", disp = "var", rate = TRUE) 54 | -------------------------------------------------------------------------------- /demo/demo_subsubclass.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | 3 | BasicPoissonBandit <- R6::R6Class( 4 | inherit = BasicBernoulliBandit, 5 | class = FALSE, 6 | public = list( 7 | weights = NULL, 8 | class_name = "BasicPoissonBandit", 9 | # Override get_reward & generate Poisson based rewards 10 | get_reward = function(t, context, action) { 11 | reward_means = rep(2,self$k) 12 | rpm <- rpois(self$k, reward_means) 13 | rewards <- matrix(rpm < self$weights, self$k, 1)*1 14 | optimal_arm <- which_max_tied(self$weights) 15 | reward <- list( 16 | reward = rewards[action$choice], 17 | optimal_arm = optimal_arm, 18 | optimal_reward = rewards[optimal_arm] 19 | ) 20 | } 21 | ) 22 | ) 23 | 24 | EpsilonGreedyAnnealingPolicy <- R6::R6Class( 25 | # Class extends EpsilonGreedyPolicy 26 | inherit = EpsilonGreedyPolicy, 27 | portable = FALSE, 28 | public = list( 29 | class_name = "EpsilonGreedyAnnealingPolicy", 30 | # Override EpsilonGreedyPolicy's get_action, use annealing epsilon 31 | get_action = function(t, context) { 32 | self$epsilon <- 1/(log(100*t+0.001)) 33 | super$get_action(t, context) 34 | } 35 | ) 36 | ) 37 | 38 | weights <- c(7,1,2) 39 | horizon <- 200 40 | simulations <- 1000 41 | bandit <- BasicPoissonBandit$new(weights) 42 | ega_policy <- EpsilonGreedyAnnealingPolicy$new() 43 | eg_policy <- EpsilonGreedyPolicy$new(0.2) 44 | agents <- list(Agent$new(ega_policy, bandit, "EG Annealing"), 45 | Agent$new(eg_policy, bandit, "EG")) 46 | simulation <- Simulator$new(agents, horizon, simulations, do_parallel = TRUE) 47 | history <- simulation$run() 48 | 49 | plot(history, type = "cumulative", no_par = TRUE, legend_border = FALSE, 50 | legend_position = "bottomright") 51 | -------------------------------------------------------------------------------- /demo/evaluations_on_public_datasets/demo_carskit_depaul.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | library(data.table) 3 | 4 | # Import personalization data-set 5 | 6 | # Info: https://d1ie9wlkzugsxr.cloudfront.net/data_irecsys_CARSKit/Movie_DePaulMovie/README.txt 7 | 8 | url <- "http://d1ie9wlkzugsxr.cloudfront.net/data_irecsys_CARSKit/Movie_DePaulMovie/ratings.csv" 9 | data <- fread(url, stringsAsFactors=TRUE) 10 | 11 | # Convert data 12 | 13 | data <- contextual::one_hot(data, cols = c("Time","Location","Companion"), sparsifyNAs = TRUE) 14 | data[, itemid := as.numeric(itemid)] 15 | data[, rating := ifelse(rating <= 3, 0, 1)] 16 | 17 | # Set simulation parameters. 18 | simulations <- 10 # here, "simulations" represents the number of boostrap samples 19 | horizon <- nrow(data) 20 | 21 | # Initiate Replay bandit with 10 arms and 100 context dimensions 22 | log_S <- data 23 | formula <- formula("rating ~ itemid | Time_Weekday + Time_Weekend + Location_Cinema + Location_Home + 24 | Companion_Alone + Companion_Family + Companion_Partner") 25 | bandit <- OfflineBootstrappedReplayBandit$new(formula = formula, data = data) 26 | 27 | # Define agents. 28 | agents <- 29 | list(Agent$new(RandomPolicy$new(), bandit, "Random"), 30 | Agent$new(EpsilonGreedyPolicy$new(0.03), bandit, "EGreedy 0.05"), 31 | Agent$new(ThompsonSamplingPolicy$new(), bandit, "ThompsonSampling"), 32 | Agent$new(LinUCBDisjointOptimizedPolicy$new(0.37), bandit, "LinUCB 0.37")) 33 | 34 | # Initialize the simulation. 35 | simulation <- 36 | Simulator$new( 37 | agents = agents, 38 | simulations = simulations, 39 | horizon = horizon 40 | ) 41 | 42 | # Run the simulation. 43 | # Takes about 5 minutes: bootstrapbandit loops for arms x horizon x simulations (times nr of agents). 44 | sim <- simulation$run() 45 | 46 | # plot the results 47 | plot(sim, type = "cumulative", regret = FALSE, rate = TRUE, 48 | legend_position = "topleft", ylim=c(0.48,0.87)) 49 | -------------------------------------------------------------------------------- /demo/offline_bandit_evaluations/demo_offline_bootstrap_replay.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | library(data.table) 3 | 4 | # Import myocardial infection dataset 5 | 6 | url <- "http://d1ie9wlkzugsxr.cloudfront.net/data_propensity/myocardial_propensity.csv" 7 | data <- fread(url) 8 | 9 | simulations <- 1000 10 | horizon <- nrow(data) 11 | 12 | # arms always start at 1 13 | data$trt <- data$trt + 1 14 | 15 | # turn death into alive, making it a reward 16 | data$alive <- abs(data$death - 1) 17 | 18 | # run bandit - when leaving out p, Propensity Bandit uses marginal prob per arm for propensities: 19 | # table(private$z)/length(private$z) 20 | 21 | f <- alive ~ trt | age + risk + severity 22 | 23 | bandit <- OfflineBootstrappedReplayBandit$new(formula = f, data = data) 24 | 25 | # Define agents. 26 | agents <- list(Agent$new(LinUCBDisjointOptimizedPolicy$new(0.2), bandit, "LinUCB")) 27 | 28 | # Initialize the simulation. 29 | 30 | simulation <- Simulator$new(agents = agents, simulations = simulations, horizon = horizon) 31 | 32 | # Run the simulation. 33 | sim <- simulation$run() 34 | 35 | # plot the results 36 | plot(sim, type = "cumulative", regret = FALSE, rate = TRUE, legend_position = "bottomright") 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /demo/offline_bandit_evaluations/demo_offline_direct_method.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | library(data.table) 3 | 4 | # Import myocardial infection dataset 5 | 6 | url <- "http://d1ie9wlkzugsxr.cloudfront.net/data_propensity/myocardial_propensity.csv" 7 | data <- fread(url) 8 | 9 | simulations <- 1000 10 | horizon <- nrow(data) 11 | 12 | # arms always start at 1 13 | data$trt <- data$trt + 1 14 | 15 | # turn death into alive, making it a reward 16 | data$alive <- abs(data$death - 1) 17 | 18 | # Run regression per arm, predict outcomes, and save results, a column per arm 19 | 20 | f <- alive ~ age + risk + severity 21 | 22 | model_f <- function(arm) glm(f, data=data[trt==arm], family=binomial(link="logit"), y=F, model=F) 23 | arms <- sort(unique(data$trt)) 24 | model_arms <- lapply(arms, FUN = model_f) 25 | 26 | predict_arm <- function(model) predict(model, data, type = "response") 27 | r_data <- lapply(model_arms, FUN = predict_arm) 28 | r_data <- do.call(cbind, r_data) 29 | colnames(r_data) <- paste0("R", (1:max(arms))) 30 | 31 | # Bind data and model predictions 32 | 33 | data <- cbind(data,r_data) 34 | 35 | # Define Bandit 36 | 37 | f <- alive ~ trt | age + risk + severity | R1 + R2 38 | 39 | bandit <- OfflineDirectMethodBandit$new(formula = f, data = data) 40 | 41 | # Define agents. 42 | agents <- list(Agent$new(LinUCBDisjointOptimizedPolicy$new(0.2), bandit, "LinUCB")) 43 | 44 | # Initialize the simulation. 45 | 46 | simulation <- Simulator$new(agents = agents, simulations = simulations, horizon = horizon) 47 | 48 | # Run the simulation. 49 | sim <- simulation$run() 50 | 51 | # plot the results 52 | plot(sim, type = "cumulative", regret = FALSE, rate = TRUE, legend_position = "bottomright") 53 | 54 | -------------------------------------------------------------------------------- /demo/offline_bandit_evaluations/demo_offline_doubly_robust.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | library(data.table) 3 | 4 | # Import myocardial infection dataset 5 | data <- fread("http://d1ie9wlkzugsxr.cloudfront.net/data_propensity/myocardial_propensity.csv") 6 | 7 | simulations <- 1000 8 | horizon <- nrow(data) 9 | 10 | # arms always start at 1 11 | data$trt <- data$trt + 1 12 | 13 | # turn death into alive, making it a reward 14 | data$alive <- abs(data$death - 1) 15 | 16 | # Run regression per arm, predict outcomes, and save results, a column per arm 17 | f <- alive ~ age + risk + severity 18 | model_f <- function(arm) glm(f, data=data[trt==arm], 19 | family=binomial(link="logit"), 20 | y=FALSE, model=FALSE) 21 | arms <- sort(unique(data$trt)) 22 | model_arms <- lapply(arms, FUN = model_f) 23 | 24 | predict_arm <- function(model) predict(model, data, type = "response") 25 | r_data <- lapply(model_arms, FUN = predict_arm) 26 | r_data <- do.call(cbind, r_data) 27 | colnames(r_data) <- paste0("r", (1:max(arms))) 28 | 29 | # Bind data and model predictions 30 | data <- cbind(data,r_data) 31 | 32 | # calculate propensity weights 33 | m <- glm(I(trt-1) ~ age + risk + severity, 34 | data=data, family=binomial(link="logit")) 35 | data$p <- predict(m, type = "response") 36 | 37 | # formula notation of dataset: 38 | # (without p, doublyrobustbandit uses marginal prob per arm for propensities) 39 | f <- alive ~ trt | age + risk + severity | r1 + r2 | p 40 | 41 | bandit <- OfflineDoublyRobustBandit$new(formula = f, data = data) 42 | agents <- list(Agent$new(LinUCBDisjointOptimizedPolicy$new(0.2), bandit, "LinUCB")) 43 | simulation <- Simulator$new(agents, horizon, simulations) 44 | sim <- simulation$run() 45 | # plot the results 46 | plot(sim, type = "cumulative", regret = FALSE, rate = TRUE, legend_position = "bottomright") 47 | -------------------------------------------------------------------------------- /demo/offline_bandit_evaluations/demo_offline_propensity_score.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | library(data.table) 3 | 4 | # Import myocardial infection dataset 5 | 6 | url <- "http://d1ie9wlkzugsxr.cloudfront.net/data_propensity/myocardial_propensity.csv" 7 | data <- fread(url) 8 | 9 | simulations <- 1000 10 | horizon <- nrow(data) 11 | 12 | # arms always start at 1 13 | data$trt <- data$trt + 1 14 | 15 | # turn death into alive, making it a reward 16 | data$alive <- abs(data$death - 1) 17 | 18 | # calculate propensity weights 19 | 20 | m <- glm(I(trt-1) ~ age + risk + severity, data=data, family=binomial(link="logit")) 21 | data$p <- predict(m, type = "response") 22 | 23 | # run bandit - when leaving out p, Propensity Bandit uses marginal prob per arm for propensities: 24 | # table(private$z)/length(private$z) 25 | 26 | f <- alive ~ trt | age + risk + severity | p 27 | 28 | bandit <- OfflinePropensityWeightingBandit$new(formula = f, data = data) 29 | 30 | # Define agents. 31 | agents <- list(Agent$new(LinUCBDisjointOptimizedPolicy$new(0.2), bandit, "LinUCB")) 32 | 33 | # Initialize the simulation. 34 | 35 | simulation <- Simulator$new(agents = agents, simulations = simulations, horizon = horizon) 36 | 37 | # Run the simulation. 38 | sim <- simulation$run() 39 | 40 | # plot the results 41 | plot(sim, type = "cumulative", regret = FALSE, rate = TRUE, legend_position = "bottomright") 42 | 43 | -------------------------------------------------------------------------------- /demo/replication_eckles_kaptein_2014/demo_bootstrap_fig_2.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | 3 | # Replication of THOMPSON SAMPLING WITH THE ONLINE BOOTSTRAP By Dean Eckles and Maurits Kaptein 4 | 5 | # https://arxiv.org/abs/1410.4009 6 | 7 | # Fig 2. Empirical regret for Thompson sampling and BTS in a K-armed binomial bandit problem with 8 | # varied differences between the optimal arm and all others. 9 | 10 | bandit <- BasicBernoulliBandit$new(weights = c(0.5, rep(0.4,9))) 11 | 12 | agents <- list(Agent$new(BootstrapTSPolicy$new(1000), bandit, "BTS 1000"), 13 | Agent$new(ThompsonSamplingPolicy$new(), bandit, "TS")) 14 | 15 | simulator <- Simulator$new(agents = agents, 16 | do_parallel = TRUE, 17 | save_interval = 50, 18 | set_seed = 999, 19 | horizon = 1e+05, 20 | simulations = 1000) 21 | 22 | simulator$run() 23 | 24 | plot(simulator$history, log = "x") 25 | -------------------------------------------------------------------------------- /demo/replication_eckles_kaptein_2014/demo_bootstrap_fig_3.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | 3 | # Replication of THOMPSON SAMPLING WITH THE ONLINE BOOTSTRAP By Dean Eckles and Maurits Kaptein 4 | 5 | # https://arxiv.org/abs/1410.4009 6 | 7 | # Fig 3: Comparison of empirical regret for BTS with varied number of bootstrap replicates. 8 | 9 | # Sim completes within an hour on a 12 core server. 10 | 11 | bandit <- BasicBernoulliBandit$new(weights = c(0.5, rep(0.4,9))) 12 | 13 | agents <- list(Agent$new(BootstrapTSPolicy$new(10), bandit, "BTS 10"), 14 | Agent$new(BootstrapTSPolicy$new(100), bandit, "BTS 100"), 15 | Agent$new(BootstrapTSPolicy$new(1000), bandit, "BTS 1000"), 16 | Agent$new(BootstrapTSPolicy$new(10000), bandit, "BTS 10000")) 17 | 18 | simulator <- Simulator$new(agents = agents, 19 | do_parallel = TRUE, 20 | save_interval = 50, 21 | horizon = 1e+05, 22 | simulations = 1000) 23 | 24 | simulator$run() 25 | 26 | plot(simulator$history, log = "x") 27 | -------------------------------------------------------------------------------- /demo/replication_kruijswijk_2018/README.md: -------------------------------------------------------------------------------- 1 | # Bandits with dependent observations 2 | 3 | Code for replication plots of the paper "Exploiting Nested Data Structures in Multi-Armed Bandits" (submitted to PLOS One). 4 | 5 | Run file 2a and 2b to generate the plots for the simulation study. Do note that running these can take quite a while - especially for the partial pooling version for Thompson sampling - so use with care. 6 | 7 | Run file 3 to generate the plots for the empirical study using the supplied .csv file. 8 | -------------------------------------------------------------------------------- /demo/replication_kruijswijk_2018/bandit_bootstrapped_replay.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | DependentObservationsBootstrappedBandit <- R6::R6Class( 3 | inherit = Bandit, 4 | class = FALSE, 5 | private = list( 6 | S = NULL, 7 | x = NULL, 8 | rows = NULL 9 | ), 10 | public = list( 11 | class_name = "DependentObservationsBootstrappedBandit", 12 | arm_multiply = NULL, 13 | initialize = function(offline_data, arms) { 14 | self$k <- arms 15 | self$d <- 1 16 | private$S <- offline_data 17 | if(!"context" %in% colnames(private$S)) private$S$context = list(1) 18 | private$S[is.null(context[[1]]),`:=`(context = list(1))] 19 | self$arm_multiply <- TRUE 20 | private$S <- do.call("rbind", replicate(self$k, private$S, simplify = FALSE)) 21 | private$rows <- nrow(private$S) # <- daar 22 | }, 23 | post_initialization = function() { 24 | private$S <- private$S[sample(nrow(private$S),replace=TRUE)] 25 | private$x <- as.matrix(private$S$context) 26 | private$x <- apply(private$x, 2, jitter) 27 | }, 28 | get_context = function(index) { 29 | print(index) 30 | if(index > private$rows) return(NULL) # <- en daar ... 31 | context <- list( 32 | k = self$k, 33 | d = self$d, 34 | user_context = private$S$user[[index]], 35 | X = private$x[[index]] 36 | ) 37 | context 38 | }, 39 | get_reward = function(index, context, action) { 40 | reward_at_index <- as.double(private$S$reward[[index]]) 41 | if (private$S$choice[[index]] == action$choice) { 42 | list( 43 | reward = reward_at_index 44 | ) 45 | } else { 46 | NULL 47 | } 48 | } 49 | ) 50 | ) 51 | -------------------------------------------------------------------------------- /demo/replication_kruijswijk_2018/bandit_replay.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | DependentObservationsReplayBandit <- R6::R6Class( 3 | inherit = Bandit, 4 | class = FALSE, 5 | private = list( 6 | S = NULL 7 | ), 8 | public = list( 9 | class_name = "DependentObservationsReplayBandit", 10 | initialize = function(offline_data, arms) { 11 | self$k <- arms 12 | self$d <- 1 13 | private$S <- offline_data 14 | if(!"context" %in% colnames(private$S)) private$S$context = list(1) 15 | private$S[is.null(context[[1]]),`:=`(context = list(1))] 16 | }, 17 | post_initialization = function() { 18 | private$S <- private$S[sample(nrow(private$S))] 19 | }, 20 | get_context = function(index) { 21 | context <- list( 22 | k = self$k, 23 | d = self$d, 24 | user_context = private$S$user[[index]], 25 | X = private$S$context[[index]] 26 | ) 27 | context 28 | }, 29 | get_reward = function(index, context, action) { 30 | reward_at_index <- as.double(private$S$reward[[index]]) 31 | if (private$S$choice[[index]] == action$choice) { 32 | list( 33 | reward = reward_at_index 34 | ) 35 | } else { 36 | NULL 37 | } 38 | } 39 | ) 40 | ) 41 | -------------------------------------------------------------------------------- /demo/replication_kruijswijk_2018/beta_binom_hier_model.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int n_subjects; // items/subjects 3 | int n[n_subjects]; // total trials 4 | int l[n_subjects]; // total successes 5 | } 6 | 7 | parameters { 8 | real phi; // population chance of success 9 | real kappa; // population concentration 10 | vector[n_subjects] theta; // chance of success 11 | } 12 | 13 | model { 14 | kappa ~ pareto(1, 1.5); // hyperprior 15 | theta ~ beta(phi * kappa, (1 - phi) * kappa); // prior 16 | l ~ binomial(n, theta); // likelihood 17 | } 18 | -------------------------------------------------------------------------------- /demo/replication_kruijswijk_2019/README.md: -------------------------------------------------------------------------------- 1 | # offline-parameter-tuning 2 | Code for the offline paramater tuning paper (submitted to IDA 2020) 3 | 4 | For the replications of the plots, see demo_lif_bandit.R and demo_tbl_bandit.R 5 | -------------------------------------------------------------------------------- /demo/replication_kruijswijk_2019/bandit_continuum_offon.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | OnlineOfflineContinuumBandit <- R6::R6Class( 3 | inherit = Bandit, 4 | class = FALSE, 5 | private = list( 6 | S = NULL 7 | ), 8 | public = list( 9 | class_name = "OnlineOfflineContinuumBandit", 10 | delta = NULL, 11 | horizon = NULL, 12 | choice = NULL, 13 | arm_function = NULL, 14 | initialize = function(FUN, delta, horizon) { 15 | self$arm_function <- FUN 16 | self$horizon <- horizon 17 | self$delta <- delta 18 | self$k <- 1 19 | }, 20 | post_initialization = function() { 21 | self$choice <- runif(self$horizon, min=0, max=1) 22 | private$S <- data.frame(self$choice, self$arm_function(self$choice)) 23 | private$S <- private$S[sample(nrow(private$S)),] 24 | colnames(private$S) <- c('choice', 'reward') 25 | }, 26 | get_context = function(index) { 27 | context <- list() 28 | context$k <- self$k 29 | context 30 | }, 31 | get_reward = function(index, context, action) { 32 | reward_at_index <- as.double(private$S$reward[[index]]) 33 | if (abs(private$S$choice[[index]] - action$choice) < self$delta) { 34 | reward <- list( 35 | reward = reward_at_index 36 | ) 37 | } else { 38 | NULL 39 | } 40 | } 41 | ) 42 | ) 43 | -------------------------------------------------------------------------------- /demo/replication_kruijswijk_2019/bandit_continuum_offon_kern.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | OnlineOfflineContinuumBanditKernel <- R6::R6Class( 3 | inherit = Bandit, 4 | class = FALSE, 5 | private = list( 6 | S = NULL, 7 | n = NULL 8 | ), 9 | public = list( 10 | class_name = "OnlineOfflineContinuumBanditKernel", 11 | delta = NULL, 12 | c1 = NULL, 13 | c2 = NULL, 14 | arm_function = NULL, 15 | choice = NULL, 16 | h = NULL, 17 | kernel = NULL, 18 | horizon = NULL, 19 | initialize = function(FUN, horizon) { 20 | self$arm_function <- FUN 21 | self$k <- 1 22 | self$horizon <- horizon 23 | self$h <- horizon^(-1/5) 24 | self$kernel <- function(action_true, action_choice, bandwith){ 1/sqrt(2*pi)*exp(((action_choice - action_true) / bandwith)^2/2) } 25 | }, 26 | post_initialization = function() { 27 | self$choice <- runif(self$horizon, min=0, max=1) 28 | private$S <- data.frame(self$choice, self$arm_function(self$choice)) 29 | private$S <- private$S[sample(nrow(private$S)),] 30 | colnames(private$S) <- c('choice', 'reward') 31 | private$n <- 0 32 | }, 33 | get_context = function(index) { 34 | context <- list() 35 | context$k <- self$k 36 | context 37 | }, 38 | get_reward = function(index, context, action) { 39 | reward_at_index <- as.double(private$S$reward[[index]]) 40 | #kern_value <- self$kernel(action_true = private$S$choice[[index]], action_choice = action$choice, bandwith = self$h) 41 | temp_u <- (action$choice - private$S$choice[[index]]) / self$h 42 | kern_value <- 1/sqrt(2*pi) * exp(-temp_u^2 / 2) 43 | #inc(private$n) <- 1 44 | #print(paste0("Kernel value: ", kern_value, "action choice: ", action$choice, "true action: ", private$S$choice[[index]], "divy: ", temp_u)) 45 | reward <- list( 46 | reward = (kern_value * reward_at_index), 47 | optimal_reward = self$c2 48 | ) 49 | } 50 | ) 51 | ) 52 | -------------------------------------------------------------------------------- /demo/replication_kruijswijk_2019/policy_tbl.R: -------------------------------------------------------------------------------- 1 | ThompsonBayesianLinearPolicy <- R6::R6Class( 2 | portable = FALSE, 3 | class = FALSE, 4 | inherit = Policy, 5 | public = list( 6 | class_name = "ThompsonBayesianLinearPolicy", 7 | J = NULL, 8 | P = NULL, 9 | err = NULL, 10 | initialize = function(J = matrix(c(0, 0.025, -0.025), nrow=1, ncol=3, byrow = TRUE), 11 | P = matrix(diag(c(2,2,5)), nrow=3, ncol=3, byrow = TRUE), 12 | err=1) { 13 | super$initialize() 14 | self$J <- J 15 | self$P <- P 16 | self$err <- err 17 | }, 18 | set_parameters = function(context_params) { 19 | self$theta <- list('J' = self$J, 'P' = self$P, 'err' = self$err) 20 | }, 21 | get_action = function(t, context) { 22 | sigma <- solve(self$theta$P, tol = 1e-200) 23 | mu <- sigma %*% matrix(self$theta$J) 24 | betas <- contextual::mvrnorm(n = 1, mu, sigma) 25 | action$choice <- -(betas[2] / (2*betas[3])) 26 | if(action$choice > 1){ 27 | action$choice <- 1 28 | } else if(action$choice < 0) { 29 | action$choice <- 0 30 | } 31 | action 32 | }, 33 | set_reward = function(t, context, action, reward) { 34 | y <- reward$reward 35 | x <- action$choice 36 | x <- matrix(c(1,x,x^2), nrow = 1, ncol = 3, byrow = TRUE) 37 | self$theta$J <- (x*y)/self$theta$err + self$theta$J 38 | self$theta$P <- t(x)%*%x + self$theta$P 39 | self$theta 40 | } 41 | ) 42 | ) 43 | -------------------------------------------------------------------------------- /demo/replication_li_2010/3_plotter.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | library(lobstr) 3 | 4 | # Config ----------------------------------------------------------------------------------------------------- 5 | 6 | load_file_name <- "Yahoo_T_2e+06_sparse_0.99.RData" 7 | 8 | # Setup ------------------------------------------------------------------------------------------------------ 9 | 10 | history <- History$new() 11 | 12 | # Take a look at the results --------------------------------------------------------------------------------- 13 | 14 | history$load_data_table(load_file_name) 15 | 16 | plot(history, regret = FALSE, rate = TRUE, type = "cumulative", legend_position = "bottomright", interval = 1000) 17 | -------------------------------------------------------------------------------- /demo/replication_li_2010/4_plotter.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | 3 | 4 | load_file_names <- list("D:/results/Yahoo_T_4681992/Yahoo_T_4681992_sparse_0.RData", 5 | "D:/results/Yahoo_T_4681992/Yahoo_T_4681992_sparse_0.7.RData", 6 | "D:/results/Yahoo_T_4681992/Yahoo_T_4681992_sparse_0.8.RData", 7 | "D:/results/Yahoo_T_4681992/Yahoo_T_4681992_sparse_0.9.RData", 8 | "D:/results/Yahoo_T_4681992/Yahoo_T_4681992_sparse_0.95.RData", 9 | "D:/results/Yahoo_T_4681992/Yahoo_T_4681992_sparse_0.99.RData") 10 | 11 | history <- History$new() 12 | 13 | ctr_list <- list() 14 | 15 | for(i in seq_along(load_file_names)) { 16 | history$load(load_file_names[[i]]) 17 | first_day_n <- floor(history$meta$sim$max_t) 18 | first_day_data <- history$get_cumulative_result(as_list = FALSE, t = first_day_n) 19 | ctr <- first_day_data$cum_reward_rate 20 | agents <- first_day_data$agent 21 | ctr_relative <- ctr / ctr[match("Random",agents)] 22 | ctr_relative <- ctr_relative[!ctr_relative==1] 23 | 24 | ctr_list[[i]] <- c(ctr_relative) 25 | } 26 | 27 | agents_relative <- agents[!agents=="Random"] 28 | 29 | all_ctr <- data.frame("100%" = ctr_list[[1]], "30%" = ctr_list[[2]], 30 | "20%" = ctr_list[[3]], "10%" = ctr_list[[4]], 31 | "5%" = ctr_list[[5]], "1%" = ctr_list[[6]], check.names = FALSE) 32 | 33 | omniscient <- 1.615 34 | par(mfrow = c(1, 1), mar = c(4, 4, 0.3, 0.1), cex=1.3) 35 | barplot(as.matrix(all_ctr), xpd = FALSE, beside=TRUE, legend = FALSE, 36 | ylab="ctr", las=1, xlab="data size", ylim = c(1,1.8)) 37 | abline(h=omniscient, col="gray", lwd=1, lty=2) 38 | barplot(as.matrix(all_ctr), xpd = FALSE,col=gray.colors(6), beside=TRUE, 39 | legend = agents_relative, args.legend = list(x = 'topright'), 40 | ylab="ctr", las=1, xlab="data size", ylim = c(1,1.8),add=TRUE) 41 | box(lwd=3) 42 | 43 | message("Plot completed") 44 | -------------------------------------------------------------------------------- /demo/replication_li_2010/alternative_db_scripts/2_run_the_simulation_on_monetdb.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | source("../dev.R") 3 | 4 | library(data.table) 5 | library(DBI) 6 | library(MonetDBLite) 7 | library(here) 8 | 9 | 10 | setwd(here::here("demo", "replication_li_2010")) 11 | 12 | source("yahoo_bandit.R") 13 | source("yahoo_policy_epsilon_greedy.R") 14 | 15 | # Connect to DB ---------------------------------------------------------------------------------------------- 16 | 17 | # monetdb.sequential=T is the difference between monetdblite life and death 18 | 19 | options(monetdb.sequential=T) 20 | 21 | db_dir <- "C:/YahooDb/yahoo.monetdblite" 22 | 23 | con <- dbConnect(MonetDBLite::MonetDBLite(), db_dir) 24 | 25 | print(paste0("MonetDBLite: connection to '",dbListTables(con),"' database succesful!")) 26 | 27 | # Config ----------------------------------------------------------------------------------------------------- 28 | 29 | simulations <- 1 30 | horizon <- 10000 31 | 32 | counted_rows <- as.integer(DBI::dbGetQuery(con, "SELECT COUNT(*) FROM yahoo" )) 33 | max_t <- as.integer(DBI::dbGetQuery(con, "SELECT max(t) FROM yahoo" )) 34 | 35 | print(counted_rows == max_t) 36 | 37 | # Get arm/article lookup 38 | 39 | arms_articles <- as.matrix(DBI::dbGetQuery(con, "SELECT DISTINCT article_id FROM yahoo")) 40 | class(arms_articles) <- "integer" 41 | 42 | # Initiate YahooBandit --------------------------------------------------------------------------------------- 43 | 44 | bandit <- YahooBandit$new(con, k = 217L, d = 36L, arm_lookup = arms_articles) 45 | 46 | agents <- 47 | list( 48 | Agent$new(YahooEpsilonGreedyPolicy$new(0.01), bandit, name = "EGreedy") 49 | ) 50 | 51 | # Define the simulation -------------------------------------------------------------------------------------- 52 | 53 | simulation <- 54 | Simulator$new( 55 | agents, 56 | simulations = simulations, 57 | horizon = horizon, 58 | do_parallel = FALSE, 59 | write_progress_file = TRUE, 60 | include_packages = c("DBI","MonetDBLite") 61 | ) 62 | 63 | # Run the simulation 64 | 65 | sim <- simulation$run() 66 | 67 | # Take a look at the results --------------------------------------------------------------------------------- 68 | 69 | print(sim$meta$sim_total_duration) 70 | 71 | plot(sim, regret = FALSE, rate = FALSE, type = "cumulative") 72 | 73 | df <- sim$get_data_frame() 74 | 75 | dbDisconnect(con, shutdown = TRUE) 76 | 77 | -------------------------------------------------------------------------------- /demo/replication_li_2010/alternative_db_scripts/2_run_the_simulation_on_monetdb_lite.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | 3 | library(data.table) 4 | library(DBI) 5 | library(MonetDBLite) 6 | library(here) 7 | 8 | setwd(here::here("demo", "replication_li_2010")) 9 | 10 | source("yahoo_bandit.R") 11 | source("yahoo_policy_epsilon_greedy.R") 12 | 13 | # Connect to DB ---------------------------------------------------------------------------------------------- 14 | 15 | # monetdb.sequential=T is the difference between monetdblite life and death 16 | 17 | options(monetdb.sequential=T) 18 | 19 | db_dir <- "C:/YahooDb/yahoo.monetdblite" 20 | 21 | con <- dbConnect(MonetDBLite::MonetDBLite(), db_dir) 22 | 23 | print(paste0("MonetDBLite: connection to '",dbListTables(con),"' database succesful!")) 24 | 25 | # Config ----------------------------------------------------------------------------------------------------- 26 | 27 | simulations <- 1 28 | horizon <- 10000 29 | 30 | counted_rows <- as.integer(DBI::dbGetQuery(con, "SELECT COUNT(*) FROM yahoo" )) 31 | max_t <- as.integer(DBI::dbGetQuery(con, "SELECT max(t) FROM yahoo" )) 32 | 33 | print(counted_rows == max_t) 34 | 35 | # Get arm/article lookup 36 | 37 | arms_articles <- as.matrix(DBI::dbGetQuery(con, "SELECT DISTINCT article_id FROM yahoo")) 38 | class(arms_articles) <- "integer" 39 | 40 | # Initiate YahooBandit --------------------------------------------------------------------------------------- 41 | 42 | bandit <- YahooBandit$new(con, k = 217L, d = 36L, arm_lookup = arms_articles) 43 | 44 | agents <- 45 | list( 46 | Agent$new(YahooEpsilonGreedyPolicy$new(0.01), bandit, name = "EGreedy") 47 | ) 48 | 49 | # Define the simulation -------------------------------------------------------------------------------------- 50 | 51 | simulation <- 52 | Simulator$new( 53 | agents, 54 | simulations = simulations, 55 | horizon = horizon, 56 | do_parallel = FALSE, 57 | write_progress_file = TRUE, 58 | include_packages = c("DBI","MonetDBLite") 59 | ) 60 | 61 | # Run the simulation 62 | 63 | sim <- simulation$run() 64 | 65 | # Take a look at the results --------------------------------------------------------------------------------- 66 | 67 | print(sim$meta$sim_total_duration) 68 | 69 | plot(sim, regret = FALSE, rate = FALSE, type = "cumulative") 70 | 71 | df <- sim$get_data_frame() 72 | 73 | dbDisconnect(con, shutdown = TRUE) 74 | 75 | -------------------------------------------------------------------------------- /demo/replication_li_2010/demo_yahoo_classes/yahoo_policy_epsilon_greedy.R: -------------------------------------------------------------------------------- 1 | YahooEpsilonGreedyPolicy <- R6::R6Class( 2 | portable = FALSE, 3 | class = FALSE, 4 | inherit = Policy, 5 | public = list( 6 | epsilon = NULL, 7 | class_name = "YahooEpsilonGreedyPolicy", 8 | initialize = function(epsilon = 0.1) { 9 | super$initialize() 10 | self$epsilon <- epsilon 11 | }, 12 | set_parameters = function(context_params) { 13 | self$theta_to_arms <- list('n' = 0, 'mean' = 0) 14 | }, 15 | get_action = function(t, context) { 16 | if (runif(1) > self$epsilon) { 17 | max_index <- context$arms[which_max_list(self$theta$mean[context$arms])] 18 | self$action$choice <- max_index 19 | } else { 20 | self$action$choice <- sample(context$arms, 1) 21 | } 22 | self$action 23 | }, 24 | set_reward = function(t, context, action, reward) { 25 | 26 | arm <- action$choice 27 | reward <- reward$reward 28 | 29 | self$theta$n[[arm]] <- self$theta$n[[arm]] + 1 30 | self$theta$mean[[arm]] <- self$theta$mean[[arm]] + (reward - self$theta$mean[[arm]]) / self$theta$n[[arm]] 31 | 32 | self$theta 33 | } 34 | ) 35 | ) 36 | -------------------------------------------------------------------------------- /demo/replication_li_2010/demo_yahoo_classes/yahoo_policy_epsilon_greedy_seg.R: -------------------------------------------------------------------------------- 1 | YahooEpsilonGreedySegPolicy <- R6::R6Class( 2 | portable = FALSE, 3 | class = FALSE, 4 | inherit = Policy, 5 | public = list( 6 | epsilon = NULL, 7 | cluster = NULL, 8 | class_name = "YahooEpsilonGreedySegPolicy", 9 | initialize = function(epsilon = 0.1) { 10 | super$initialize() 11 | self$epsilon <- epsilon 12 | }, 13 | set_parameters = function(context_params) { 14 | self$theta_to_arms <- list('n' = rep(0,5), 'mean' = rep(0,5)) 15 | }, 16 | get_action = function(t, context) { 17 | local_arms <- context$arms 18 | if (runif(1) > self$epsilon) { 19 | # find the feature on which a user scores highest - that is this user's cluster 20 | self$cluster <- which.max(head(context$X[context$unique,1],-1)) 21 | expected_rewards <- rep(0.0, length(local_arms)) 22 | for (arm in seq_along(local_arms)) { 23 | expected_rewards[arm] <- self$theta$mean[[local_arms[arm]]][self$cluster] 24 | } 25 | action$choice <- local_arms[which_max_tied(expected_rewards)] 26 | } else { 27 | action$choice <- sample(local_arms, 1) 28 | } 29 | action 30 | }, 31 | set_reward = function(t, context, action, reward) { 32 | arm <- action$choice 33 | reward <- reward$reward 34 | self$theta$n[[arm]][self$cluster] <- self$theta$n[[arm]][self$cluster] + 1 35 | self$theta$mean[[arm]][self$cluster] <- self$theta$mean[[arm]][self$cluster] + 36 | (reward - self$theta$mean[[arm]][self$cluster]) / 37 | self$theta$n[[arm]][self$cluster] 38 | self$theta 39 | } 40 | ) 41 | ) 42 | -------------------------------------------------------------------------------- /demo/replication_li_2010/demo_yahoo_classes/yahoo_policy_linucb_disjoint.R: -------------------------------------------------------------------------------- 1 | YahooLinUCBDisjointPolicy <- R6::R6Class( 2 | portable = FALSE, 3 | class = FALSE, 4 | inherit = Policy, 5 | public = list( 6 | alpha = NULL, 7 | class_name = "YahooLinUCBDisjointPolicy", 8 | initialize = function(alpha = 0.2) { 9 | super$initialize() 10 | self$alpha <- alpha 11 | }, 12 | set_parameters = function(context_params) { 13 | ul <- length(context_params$unique) 14 | 15 | self$theta_to_arms <- list( 'A' = diag(1,ul,ul), 'b' = rep(0,ul), 16 | 'A_inv' = solve(diag(1,ul,ul))) 17 | }, 18 | get_action = function(t, context) { 19 | 20 | expected_rewards <- rep(0.0, length(context$arms)) 21 | local_arms <- context$arms 22 | for (arm in seq_along(local_arms)) { 23 | 24 | x <- context$X[context$unique,arm] 25 | A <- self$theta$A[[local_arms[arm]]] 26 | A_inv <- self$theta$A_inv[[local_arms[arm]]] 27 | b <- self$theta$b[[local_arms[arm]]] 28 | theta_hat <- A_inv %*% b 29 | mean <- x %*% theta_hat 30 | sd <- sqrt(tcrossprod(x %*% A_inv, x)) 31 | expected_rewards[arm] <- mean + self$alpha * sd 32 | } 33 | action$choice <- context$arms[which_max_tied(expected_rewards)] 34 | 35 | action 36 | }, 37 | set_reward = function(t, context, action, reward) { 38 | 39 | arm <- action$choice 40 | arm_index <- which(context$arms == arm) 41 | reward <- reward$reward 42 | x <- context$X[context$unique,arm_index] 43 | A_inv <- self$theta$A_inv[[arm]] 44 | self$theta$A_inv[[arm]] <- sherman_morrisson(self$theta$A_inv[[arm]],x) 45 | self$theta$A[[arm]] <- self$theta$A[[arm]] + outer(x, x) 46 | self$theta$b[[arm]] <- self$theta$b[[arm]] + reward * x 47 | 48 | self$theta 49 | } 50 | ) 51 | ) 52 | -------------------------------------------------------------------------------- /demo/replication_li_2010/demo_yahoo_classes/yahoo_policy_random.R: -------------------------------------------------------------------------------- 1 | YahooRandomPolicy <- R6::R6Class( 2 | portable = FALSE, 3 | class = FALSE, 4 | inherit = Policy, 5 | public = list( 6 | class_name = "YahooRandomPolicy", 7 | initialize = function() { 8 | super$initialize() 9 | }, 10 | set_parameters = function(context_params) { 11 | self$theta_to_arms <- list('n' = 0, 'mean' = 0) 12 | }, 13 | get_action = function(t, context) { 14 | action$choice <- sample(context$arms, 1) 15 | action 16 | }, 17 | set_reward = function(t, context, action, reward) { 18 | arm <- action$choice 19 | reward <- reward$reward 20 | inc(self$theta$n[[arm]]) <- 1 21 | inc(self$theta$mean[[arm]]) <- (reward - self$theta$mean[[arm]]) / self$theta$n[[arm]] 22 | self$theta 23 | } 24 | ) 25 | ) 26 | -------------------------------------------------------------------------------- /demo/replication_li_2010/demo_yahoo_classes/yahoo_policy_ucb1_alpha.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | YahooUCB1AlphaPolicy <- R6::R6Class( 3 | portable = FALSE, 4 | class = FALSE, 5 | inherit = Policy, 6 | public = list( 7 | alpha = NULL, 8 | class_name = "YahooUCB1AlphaPolicy", 9 | initialize = function(alpha) { 10 | super$initialize() 11 | self$alpha <- alpha 12 | }, 13 | set_parameters = function(context_params) { 14 | self$theta_to_arms <- list('n' = 0, 'mean' = 0) 15 | }, 16 | get_action = function(t, context) { 17 | local_arms <- context$arms 18 | n_zero_arms <- which(self$theta$n[local_arms] == 0) 19 | if (length(n_zero_arms) > 0) { 20 | action$choice <- local_arms[sample_one_of(n_zero_arms)] 21 | return(action) 22 | } 23 | expected_rewards <- rep(0.0, length(context$arms)) 24 | for (arm in seq_along(local_arms)) { 25 | # usb1 variance as in Li 2010 paper 26 | variance <- self$alpha / sqrt( self$theta$n[[local_arms[arm]]] ) 27 | expected_rewards[arm] <- self$theta$mean[[local_arms[arm]]] + variance 28 | } 29 | action$choice <- local_arms[which_max_tied(expected_rewards)] 30 | action 31 | }, 32 | set_reward = function(t, context, action, reward) { 33 | 34 | arm <- action$choice 35 | reward <- reward$reward 36 | self$theta$n[[arm]] <- self$theta$n[[arm]] + 1 37 | self$theta$mean[[arm]] <- self$theta$mean[[arm]] + (reward - self$theta$mean[[arm]]) / self$theta$n[[arm]] 38 | 39 | self$theta 40 | } 41 | ) 42 | ) 43 | -------------------------------------------------------------------------------- /demo/replication_li_2010/demo_yahoo_classes/yahoo_policy_ucb1_alpha_seg.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | YahooUCB1AlphaSegPolicy <- R6::R6Class( 3 | portable = FALSE, 4 | class = FALSE, 5 | inherit = Policy, 6 | public = list( 7 | alpha = NULL, 8 | cluster = NULL, 9 | class_name = "YahooUCB1AlphaSegPolicy", 10 | initialize = function(alpha) { 11 | super$initialize() 12 | self$alpha <- alpha 13 | }, 14 | set_parameters = function(context_params) { 15 | self$theta_to_arms <- list('n' = rep(0,5), 'mean' = rep(0,5)) 16 | }, 17 | get_action = function(t, context) { 18 | # find the feature on which a user scores highest - that is this user's cluster 19 | self$cluster <- which.max(head(context$X[context$unique,1],-1)) 20 | local_arms <- context$arms 21 | for (arm in seq_along(local_arms)) { 22 | if(self$theta$n[[local_arms[arm]]][self$cluster] == 0) { 23 | action$choice <- local_arms[arm] 24 | return(action) 25 | } 26 | } 27 | expected_rewards <- rep(0.0, length(local_arms)) 28 | for (arm in seq_along(local_arms)) { 29 | variance <- self$alpha / sqrt( self$theta$n[[local_arms[arm]]][self$cluster] ) 30 | expected_rewards[arm] <- self$theta$mean[[local_arms[arm]]][self$cluster] + variance 31 | } 32 | action$choice <- local_arms[which_max_tied(expected_rewards)] 33 | action 34 | }, 35 | set_reward = function(t, context, action, reward) { 36 | 37 | arm <- action$choice 38 | reward <- reward$reward 39 | self$theta$n[[arm]][self$cluster] <- self$theta$n[[arm]][self$cluster] + 1 40 | self$theta$mean[[arm]][self$cluster] <- self$theta$mean[[arm]][self$cluster] + 41 | (reward - self$theta$mean[[arm]][self$cluster]) / 42 | self$theta$n[[arm]][self$cluster] 43 | self$theta 44 | } 45 | ) 46 | ) 47 | -------------------------------------------------------------------------------- /demo/replication_li_2010/demo_yahoo_exploration/plots.R: -------------------------------------------------------------------------------- 1 | library(DBI) 2 | library(MonetDBLite) 3 | library(ggplot2) 4 | 5 | con <- DBI::dbConnect(MonetDB.R(), host="monetdb_ip", dbname="yahoo", user="monetdb", password="monetdb") 6 | print(paste0("MonetDB: connection to '",dbListTables(con),"' database succesful!")) 7 | 8 | times <- dbGetQuery(con, "SELECT timestamped, COUNT(timestamped) FROM yahoo GROUP BY timestamped") 9 | names(times) <- c('timestamped', 'count') 10 | times$timestamped <- as.POSIXct(times$timestamped, origin = "1970-01-01") 11 | 12 | # Traffic ---------------------------------------------------------------------------------------------------- 13 | 14 | times <- dbGetQuery(con, "SELECT timestamped, COUNT(timestamped) FROM yahoo GROUP BY timestamped") 15 | names(times) <- c('timestamped', 'count') 16 | times$timestamped <- as.POSIXct(times$timestamped, origin = "1970-01-01") 17 | ggplot(times, aes(timestamped, count)) + geom_line() + ggtitle("Traffic") 18 | 19 | # CTR over time ---------------------------------------------------------------------------------------------- 20 | 21 | ctr <- 22 | dbGetQuery(con, "SELECT timestamped, AVG(click) FROM yahoo GROUP BY timestamped") 23 | names(ctr) <- c('timestamped', 'ctr') 24 | ctr$timestamped <- as.POSIXct(ctr$timestamped, origin = "1970-01-01") 25 | ggplot(ctr, aes(timestamped, ctr)) + geom_line() + ggtitle("CTR") 26 | 27 | # clickthrough rates, no context, no cluster ----------------------------------------------------------------- 28 | 29 | ctrs <- dbGetQuery(con, 'SELECT article_id, AVG(click) as ctr from yahoo GROUP BY article_id ORDER BY ctr') 30 | 31 | barplot(ctrs$ctr, names.arg=ctrs$article_id, ylim=c(0,0.1)) 32 | 33 | # top 5 34 | 35 | barplot(tail(ctrs$ctr,5), names.arg=tail(ctrs$article_id,5), ylim=c(0,0.1)) 36 | 37 | # worst 10 38 | 39 | barplot(head(ctrs$ctr,5), names.arg=head(ctrs$article_id,5), ylim=c(0,0.1)) 40 | 41 | # disconnect from and then shutdown DB ----------------------------------------------------------------------- 42 | 43 | dbDisconnect(con, shutdown = TRUE) 44 | -------------------------------------------------------------------------------- /demo/replication_van_emden_2018/section_2_3.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | 3 | bandit <- ContextualBernoulliBandit$new(matrix(c(0.5, 0.2, 0.1), 1)) 4 | policy <- EpsilonGreedyPolicy$new(0.1) 5 | agent <- Agent$new(policy,bandit) 6 | sim <- Simulator$new(agent, simulations = 10000, horizon = 100) 7 | history <- sim$run() 8 | 9 | summary(history) 10 | 11 | par(mfrow = c(1, 1), mar = c(4, 4, 0.5, 1), cex=1.3) 12 | plot(history, type = "arms", no_par = TRUE) 13 | -------------------------------------------------------------------------------- /demo/replication_van_emden_2018/section_3_2_1.R: -------------------------------------------------------------------------------- 1 | # Load and attach the contextual package. 2 | library(contextual) 3 | # Define for how long the simulation will run. 4 | horizon <- 400 5 | # Define how many times to repeat the simulation. 6 | simulations <- 10000 7 | # Define the probability that each ad will be clicked. 8 | click_probabilities <- matrix(c(0.6, 0.4, 0.2), nrow = 1, ncol = 3, byrow = TRUE) 9 | # Initialize a SyntheticBandit, which takes probabilites per arm for an argument. 10 | bandit <- ContextualBernoulliBandit$new(weights = click_probabilities) 11 | # Initialize EpsilonGreedyPolicy with a 40% exploiration rate. 12 | eg_policy <- EpsilonGreedyPolicy$new(epsilon = 0.4) 13 | # Initialize EpsilonFirstPolicy with a .25 x 400 = 100 step exploration period. 14 | ef_policy <- EpsilonFirstPolicy$new(epsilon = 0.25, N = horizon) 15 | # Initialize two Agents, binding each policy to a bandit. 16 | ef_agent <- Agent$new(ef_policy, bandit) 17 | eg_agent <- Agent$new(eg_policy, bandit) 18 | # Assign both agents to a list. 19 | agents <- list(ef_agent, eg_agent) 20 | # Initialize a Simulator with the agent list, horizon, and number of simulations. 21 | simulator <- Simulator$new(agents, horizon, simulations, do_parallel = TRUE) 22 | # Now run the simulator. 23 | history <- simulator$run() 24 | # Finally, plot the average reward per time step t 25 | par(mfrow = c(1, 2), mar = c(2,4,1,1), cex=1.4) 26 | plot(history, type = "average", regret = FALSE, no_par = TRUE, legend_border = FALSE, legend_position = "bottomright") 27 | # And the cumulative reward rate, which equals the Click Through Rate) 28 | plot(history, type = "cumulative", regret = FALSE, rate = TRUE, no_par = TRUE, legend_border = FALSE, legend_position = "bottomright") 29 | par(mfrow = c(1, 1)) 30 | -------------------------------------------------------------------------------- /demo/replication_van_emden_2018/section_3_2_2.R: -------------------------------------------------------------------------------- 1 | # Load and attach the contextual package. 2 | library(contextual) 3 | # Define for how long the simulation will run. 4 | horizon <- 400 5 | # Define how many times to repeat the simulation. 6 | simulations <- 10000 7 | # Define the probability that each ad will be clicked. 8 | click_probabilities <- matrix(c(0.6, 0.4, 0.2), nrow = 1, ncol = 3, byrow = TRUE) 9 | # Initialize a SyntheticBandit, which takes probabilites per arm for an argument. 10 | bandit <- ContextualBernoulliBandit$new(weights = click_probabilities) 11 | # Initialize EpsilonGreedyPolicy with a 40% exploiration rate. 12 | eg_policy <- EpsilonGreedyPolicy$new(epsilon = 0.4) 13 | # Initialize EpsilonFirstPolicy with a .25 x 400 = 100 step exploration period. 14 | ef_policy <- EpsilonFirstPolicy$new(epsilon = 0.25, N = horizon) 15 | # Initialize two Agents, binding each policy to a bandit. 16 | ef_agent <- Agent$new(ef_policy, bandit) 17 | eg_agent <- Agent$new(eg_policy, bandit) 18 | # Assign both agents to a list. 19 | 20 | ################################################################################################## 21 | # +-----+----+-----------> arms: three ads 22 | # | | | 23 | click_probs <- matrix(c(0.5, 0.7, 0.1, # -> context 1: older (p=.5) 24 | 0.7, 0.1, 0.3), # -> context 2: young (p=.5) 25 | 26 | nrow = 2, ncol = 3, byrow = TRUE) 27 | 28 | # Initialize a SyntheticBandit with contextual weights 29 | context_bandit <- ContextualBernoulliBandit$new(weights = click_probs) 30 | # Initialize LinUCBDisjointPolicy 31 | lucb_policy <- LinUCBDisjointPolicy$new(0.6) 32 | # Initialize three Agents, binding each policy to a bandit. 33 | ef_agent <- Agent$new(ef_policy, context_bandit) 34 | eg_agent <- Agent$new(eg_policy, context_bandit) 35 | lucb_agent <- Agent$new(lucb_policy, context_bandit) 36 | # Assign all agents to a list. 37 | agents <- list(ef_agent, eg_agent, lucb_agent) 38 | # Initialize a Simulator with the agent list, horizon, and nr of simulations 39 | simulator <- Simulator$new(agents, horizon, simulations) 40 | # Now run the simulator. 41 | history <- simulator$run() 42 | 43 | par(mfrow = c(1, 2), mar = c(2,4,1,1) , cex=1.4) 44 | # Finally, plot the average reward per time step t 45 | plot(history, type = "average", regret = FALSE, no_par = TRUE, legend_border = FALSE, legend_position = "bottomright") 46 | # And the cumulative reward rate, which equals the Click Through Rate) 47 | plot(history, type = "cumulative", regret = FALSE, rate = TRUE, no_par = TRUE, legend_border = FALSE, legend_position = "bottomright") 48 | par(mfrow = c(1, 1)) 49 | -------------------------------------------------------------------------------- /demo/replication_van_emden_2018/section_4_2_plot.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | 3 | bandit <- ContextualBernoulliBandit$new(weights = matrix(c(0.7, 0.2, 0.2),1,3)) 4 | 5 | agents <- list(Agent$new(RandomPolicy$new(), bandit), 6 | Agent$new(OraclePolicy$new(), bandit), 7 | Agent$new(ThompsonSamplingPolicy$new(1.0, 1.0), bandit), 8 | Agent$new(Exp3Policy$new(0.1), bandit), 9 | Agent$new(GittinsBrezziLaiPolicy$new(), bandit), 10 | Agent$new(UCB1Policy$new(), bandit)) 11 | 12 | history <- Simulator$new(agents, horizon = 100, simulations = 300)$run() 13 | 14 | par(mfrow = c(3, 2), mar = c(1, 4, 2, 1), cex=1.3) #bottom, left, top, and right. 15 | plot(history, type = "cumulative", use_colors = FALSE, no_par = TRUE, legend_border = FALSE, 16 | limit_agents = c("GittinsBrezziLai", "UCB1","ThompsonSampling")) 17 | 18 | plot(history, type = "cumulative", regret = FALSE, legend = FALSE, 19 | limit_agents = c("Exp3"), traces = TRUE, no_par = TRUE) 20 | 21 | plot(history, type = "cumulative", regret = FALSE, rate = TRUE, disp = "sd", 22 | limit_agents = c("Exp3", "ThompsonSampling"), legend_border = FALSE, 23 | legend_position = "bottomright", no_par = TRUE) 24 | 25 | plot(history, type = "cumulative", rate = TRUE, plot_only_disp = TRUE, 26 | disp = "var", smooth = TRUE, limit_agents = c("Exp3", "ThompsonSampling"), 27 | legend_border = FALSE, legend_position = "bottomright", no_par = TRUE) 28 | 29 | plot(history, type = "average", disp = "ci", regret = FALSE, interval = 10, 30 | smooth = TRUE, legend_position = "bottomright", no_par = TRUE, legend = FALSE) 31 | 32 | plot(history, limit_agents = c("ThompsonSampling"), type = "arms", 33 | interval = 20, no_par = TRUE) 34 | 35 | par(mfrow = c(1, 1)) 36 | -------------------------------------------------------------------------------- /demo/replication_van_emden_2018/section_5_2.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | 3 | horizon <- 100 4 | simulations <- 1000 5 | weights <- matrix(c(0.6, 0.2, 0.2), 1, 3) 6 | 7 | policy <- EpsilonFirstPolicy$new(epsilon = 0.5, N = horizon) 8 | bandit <- ContextualBernoulliBandit$new(weights = weights) 9 | 10 | agent <- Agent$new(policy,bandit) 11 | 12 | simulator <- Simulator$new(agents = agent, 13 | horizon = horizon, 14 | simulations = simulations) 15 | 16 | history <- simulator$run() 17 | 18 | par(mfrow = c(1, 2), mar = c(2, 4, 1, 1), cex=1.4) #bottom, left, top, and right. 19 | plot(history, type = "cumulative", no_par = TRUE, legend_border = FALSE, legend_position = "bottomright") 20 | plot(history, type = "arms", no_par = TRUE) 21 | par(mfrow = c(1, 1)) 22 | -------------------------------------------------------------------------------- /demo/replication_van_emden_2018/section_5_3.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | 3 | horizon <- 100 4 | simulations <- 1000 5 | weights <- matrix(c(0.8, 0.2, 0.2), 1, 3) 6 | 7 | policy <- EpsilonGreedyPolicy$new(epsilon = 0.1) 8 | bandit <- ContextualBernoulliBandit$new(weights = weights) 9 | 10 | agent <- Agent$new(policy,bandit) 11 | 12 | simulator <- Simulator$new(agents = agent, 13 | horizon = horizon, 14 | simulations = simulations) 15 | 16 | history <- simulator$run() 17 | 18 | par(mfrow = c(1, 2), mar = c(2, 4, 1, 1), cex=1.4) #bottom, left, top, and right. 19 | plot(history, type = "cumulative", no_par = TRUE, legend_border = FALSE) 20 | plot(history, type = "arms", no_par = TRUE) 21 | par(mfrow = c(1, 1)) 22 | -------------------------------------------------------------------------------- /demo/replication_van_emden_2018/section_5_4.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | 3 | horizon <- 100L 4 | simulations <- 1000L 5 | 6 | # k=1 k=2 k=3 -> columns represent arms 7 | weights <- matrix(c(0.6, 0.2, 0.2, # d=1 -> rows represent 8 | 0.2, 0.6, 0.2, # d=2 context features, 9 | 0.2, 0.2, 0.6), # d=3 10 | 11 | nrow = 3, ncol = 3, byrow = TRUE) 12 | 13 | bandit <- ContextualBernoulliBandit$new(weights = weights) 14 | 15 | eg_policy <- EpsilonGreedyPolicy$new(0.1) 16 | lucb_policy <- LinUCBDisjointPolicy$new(0.6) 17 | 18 | agents <- list(Agent$new(eg_policy, bandit, "EGreedy"), 19 | Agent$new(lucb_policy, bandit, "LinUCB")) 20 | 21 | simulation <- Simulator$new(agents, horizon, simulations, save_context = TRUE) 22 | history <- simulation$run() 23 | 24 | par(mfrow = c(2, 3), mar = c(2, 4, 1, 0.1), cex=1.3) #bottom, left, top, and right. 25 | 26 | plot(history, type = "cumulative", legend_border = FALSE, no_par = TRUE ) 27 | plot(history, type = "arms", limit_agents = c("LinUCB"), no_par = TRUE) 28 | plot(history, type = "arms", limit_agents = c("EGreedy"), no_par = TRUE) 29 | 30 | plot(history, type = "arms", limit_agents = c("LinUCB"), limit_context = c("X.1"), no_par = TRUE) 31 | plot(history, type = "arms", limit_agents = c("LinUCB"), limit_context = c("X.2"), no_par = TRUE ) 32 | plot(history, type = "arms", limit_agents = c("LinUCB"), limit_context = c("X.3"), no_par = TRUE ) 33 | 34 | par(mfrow = c(1, 1)) 35 | -------------------------------------------------------------------------------- /demo/replication_van_emden_2018/section_6.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | 3 | BasicPoissonBandit <- R6::R6Class( 4 | inherit = BasicBernoulliBandit, 5 | class = FALSE, 6 | public = list( 7 | weights = NULL, 8 | class_name = "BasicPoissonBandit", 9 | # Override get_reward & generate Poisson based rewards 10 | get_reward = function(t, context, action) { 11 | reward_means = rep(2,self$k) 12 | rpm <- rpois(self$k, reward_means) 13 | rewards <- matrix(rpm < self$weights, self$k, 1)*1 14 | optimal_arm <- which_max_tied(self$weights) 15 | reward <- list( 16 | reward = rewards[action$choice], 17 | optimal_arm = optimal_arm, 18 | optimal_reward = rewards[optimal_arm] 19 | ) 20 | } 21 | ) 22 | ) 23 | 24 | EpsilonGreedyAnnealingPolicy <- R6::R6Class( 25 | # Class extends EpsilonGreedyPolicy 26 | inherit = EpsilonGreedyPolicy, 27 | portable = FALSE, 28 | public = list( 29 | class_name = "EpsilonGreedyAnnealingPolicy", 30 | # Override EpsilonGreedyPolicy's get_action, use annealing epsilon 31 | get_action = function(t, context) { 32 | self$epsilon <- 1/(log(100*t+0.001)) 33 | super$get_action(t, context) 34 | } 35 | ) 36 | ) 37 | 38 | weights <- c(7,1,2) 39 | horizon <- 200 40 | simulations <- 1000 41 | bandit <- BasicPoissonBandit$new(weights) 42 | ega_policy <- EpsilonGreedyAnnealingPolicy$new() 43 | eg_policy <- EpsilonGreedyPolicy$new(0.2) 44 | agents <- list(Agent$new(ega_policy, bandit, "EG Annealing"), 45 | Agent$new(eg_policy, bandit, "EG")) 46 | simulation <- Simulator$new(agents, horizon, simulations, do_parallel = FALSE) 47 | history <- simulation$run() 48 | 49 | 50 | par(mfrow = c(1, 3), mar = c(2, 4, 1, 0.1), cex=1.3) #bottom, left, top, and right. 51 | 52 | 53 | plot(history, type = "cumulative", no_par = TRUE, legend_border = FALSE, 54 | legend_position = "bottomright") 55 | plot(history, type = "arms", limit_agents = c("EG"), no_par = TRUE, 56 | interval = 25) 57 | plot(history, type = "arms", limit_agents = c("EG Annealing"), no_par = TRUE, 58 | interval = 25) 59 | 60 | par(mfrow = c(1, 1)) 61 | -------------------------------------------------------------------------------- /demo/replication_van_emden_2018/section_7.R: -------------------------------------------------------------------------------- 1 | library(contextual) 2 | library(data.table) 3 | 4 | # Define Replay Bandit 5 | OfflineReplayEvaluatorBandit <- R6::R6Class( 6 | inherit = Bandit, 7 | private = list( 8 | S = NULL 9 | ), 10 | public = list( 11 | class_name = "OfflineReplayEvaluatorBandit", 12 | initialize = function(offline_data, k, d) { 13 | self$k <- k # Number of arms 14 | self$d <- d # Context feature vector dimensions 15 | private$S <- offline_data # Logged events 16 | }, 17 | get_context = function(index) { 18 | context <- list( 19 | k = self$k, 20 | d = self$d, 21 | X = private$S$context[[index]] 22 | ) 23 | context 24 | }, 25 | get_reward = function(index, context, action) { 26 | if (private$S$choice[[index]] == action$choice) { 27 | list( 28 | reward = as.double(private$S$reward[[index]]) 29 | ) 30 | } else { 31 | NULL 32 | } 33 | } 34 | ) 35 | ) 36 | 37 | # Import personalization data-set 38 | url <- "http://d1ie9wlkzugsxr.cloudfront.net/data_cmab_basic/dataset.txt" 39 | datafile <- fread(url) 40 | 41 | # Clean up datafile 42 | datafile[, context := as.list(as.data.frame(t(datafile[, 3:102])))] 43 | datafile[, (3:102) := NULL] 44 | datafile[, t := .I] 45 | datafile[, sim := 1] 46 | datafile[, agent := "linucb"] 47 | setnames(datafile, c("V1", "V2"), c("choice", "reward")) 48 | 49 | # Set simulation parameters. 50 | simulations <- 1 51 | horizon <- nrow(datafile) 52 | 53 | # Initiate Replay bandit with 10 arms and 100 context dimensions 54 | log_S <- datafile 55 | bandit <- OfflineReplayEvaluatorBandit$new(log_S, k = 10, d = 100) 56 | 57 | # Define agents. 58 | agents <- 59 | list(Agent$new(LinUCBDisjointOptimizedPolicy$new(0.01), bandit, "alpha = 0.01"), 60 | Agent$new(LinUCBDisjointOptimizedPolicy$new(0.05), bandit, "alpha = 0.05"), 61 | Agent$new(LinUCBDisjointOptimizedPolicy$new(0.1), bandit, "alpha = 0.1"), 62 | Agent$new(LinUCBDisjointOptimizedPolicy$new(1.0), bandit, "alpha = 1.0")) 63 | 64 | # Initialize the simulation. 65 | simulation <- 66 | Simulator$new( 67 | agents = agents, 68 | simulations = simulations, 69 | horizon = horizon, 70 | save_context = TRUE 71 | ) 72 | 73 | # Run the simulation. 74 | linucb_sim <- simulation$run() 75 | 76 | # plot the results 77 | par(mfrow = c(1, 1), mar = c(4, 4, 0.5, 1), cex=1.3) 78 | plot(linucb_sim, type = "cumulative", regret = FALSE, legend_title = "LinUCB", 79 | rate = TRUE, legend_position = "bottomright") 80 | -------------------------------------------------------------------------------- /demo/replication_van_emden_2018/section_8.R: -------------------------------------------------------------------------------- 1 | # See the demo/replication_li_2010 directory. -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/fig/all_cmab_phases_Part1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/all_cmab_phases_Part1.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/fig/all_cmab_phases_Part2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/all_cmab_phases_Part2.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/fig/all_cmab_phases_Part3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/all_cmab_phases_Part3.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/fig/all_cmab_phases_Part4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/all_cmab_phases_Part4.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/fig/all_cmab_phases_Part5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/all_cmab_phases_Part5.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/fig/all_cmab_phases_Part6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/all_cmab_phases_Part6.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/fig/all_cmab_phases_Part7.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/all_cmab_phases_Part7.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/fig/all_cmab_phases_Part8.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/all_cmab_phases_Part8.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/fig/cmab_chart.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/cmab_chart.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/fig/contextual_class.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/contextual_class.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/fig/contextual_sequence.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/contextual_sequence.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/fig/offline_bandit.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/offline_bandit.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/fig/section_2_3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/section_2_3.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/fig/section_3_2_1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/section_3_2_1.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/fig/section_3_2_2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/section_3_2_2.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/fig/section_4_2_plot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/section_4_2_plot.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/fig/section_5_2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/section_5_2.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/fig/section_5_3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/section_5_3.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/fig/section_5_4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/section_5_4.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/fig/section_5_5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/section_5_5.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/fig/section_8_bar.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/section_8_bar.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/fig/section_8_plot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/fig/section_8_plot.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/jss.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/jss.pdf -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/jss.synctex.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/jss.synctex.gz -------------------------------------------------------------------------------- /docs/articles/arxiv_2018/jsslogo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/arxiv_2018/jsslogo.jpg -------------------------------------------------------------------------------- /docs/articles/basic_epsilon_greedy.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/basic_epsilon_greedy.jpeg -------------------------------------------------------------------------------- /docs/articles/basic_epsilon_greedy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/basic_epsilon_greedy.jpg -------------------------------------------------------------------------------- /docs/articles/basic_epsilon_greedy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/basic_epsilon_greedy.png -------------------------------------------------------------------------------- /docs/articles/carskit_depaul.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/carskit_depaul.jpeg -------------------------------------------------------------------------------- /docs/articles/carskit_depaul.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/carskit_depaul.jpg -------------------------------------------------------------------------------- /docs/articles/carskit_depaul.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/carskit_depaul.png -------------------------------------------------------------------------------- /docs/articles/cmabs.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/cmabs.jpeg -------------------------------------------------------------------------------- /docs/articles/cmabs.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/cmabs.jpg -------------------------------------------------------------------------------- /docs/articles/cmabs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/cmabs.png -------------------------------------------------------------------------------- /docs/articles/compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/compare.png -------------------------------------------------------------------------------- /docs/articles/contextual-fig-1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/contextual-fig-1.jpg -------------------------------------------------------------------------------- /docs/articles/contextual-fig-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/contextual-fig-1.png -------------------------------------------------------------------------------- /docs/articles/contextual-fig-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/contextual-fig-2.jpg -------------------------------------------------------------------------------- /docs/articles/contextual-fig-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/contextual-fig-2.png -------------------------------------------------------------------------------- /docs/articles/eckles_kaptein_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eckles_kaptein_1.jpg -------------------------------------------------------------------------------- /docs/articles/eckles_kaptein_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eckles_kaptein_1.png -------------------------------------------------------------------------------- /docs/articles/eg_average_reward.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_average_reward.jpeg -------------------------------------------------------------------------------- /docs/articles/eg_average_reward.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_average_reward.jpg -------------------------------------------------------------------------------- /docs/articles/eg_average_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_average_reward.png -------------------------------------------------------------------------------- /docs/articles/eg_cumulative_reward.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_cumulative_reward.jpeg -------------------------------------------------------------------------------- /docs/articles/eg_cumulative_reward.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_cumulative_reward.jpg -------------------------------------------------------------------------------- /docs/articles/eg_cumulative_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_cumulative_reward.png -------------------------------------------------------------------------------- /docs/articles/eg_incorrect.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_incorrect.jpeg -------------------------------------------------------------------------------- /docs/articles/eg_incorrect.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_incorrect.jpg -------------------------------------------------------------------------------- /docs/articles/eg_incorrect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_incorrect.png -------------------------------------------------------------------------------- /docs/articles/eg_optimal_action.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_optimal_action.jpeg -------------------------------------------------------------------------------- /docs/articles/eg_optimal_action.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_optimal_action.jpg -------------------------------------------------------------------------------- /docs/articles/eg_optimal_action.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/eg_optimal_action.png -------------------------------------------------------------------------------- /docs/articles/linucboffline.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/linucboffline.jpeg -------------------------------------------------------------------------------- /docs/articles/linucboffline.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/linucboffline.jpg -------------------------------------------------------------------------------- /docs/articles/linucboffline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/linucboffline.png -------------------------------------------------------------------------------- /docs/articles/mabs.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/mabs.jpeg -------------------------------------------------------------------------------- /docs/articles/mabs.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/mabs.jpg -------------------------------------------------------------------------------- /docs/articles/mabs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/mabs.png -------------------------------------------------------------------------------- /docs/articles/ml10m.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/ml10m.jpg -------------------------------------------------------------------------------- /docs/articles/ml10m.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/ml10m.png -------------------------------------------------------------------------------- /docs/articles/replication-fig-1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/replication-fig-1.jpg -------------------------------------------------------------------------------- /docs/articles/replication-fig-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/replication-fig-1.png -------------------------------------------------------------------------------- /docs/articles/replication-fig-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/replication-fig-2.jpg -------------------------------------------------------------------------------- /docs/articles/replication-fig-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/replication-fig-2.png -------------------------------------------------------------------------------- /docs/articles/softmax_average_reward.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/softmax_average_reward.jpeg -------------------------------------------------------------------------------- /docs/articles/softmax_average_reward.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/softmax_average_reward.jpg -------------------------------------------------------------------------------- /docs/articles/softmax_average_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/softmax_average_reward.png -------------------------------------------------------------------------------- /docs/articles/softmax_cumulative_reward.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/softmax_cumulative_reward.jpeg -------------------------------------------------------------------------------- /docs/articles/softmax_cumulative_reward.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/softmax_cumulative_reward.jpg -------------------------------------------------------------------------------- /docs/articles/softmax_cumulative_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/softmax_cumulative_reward.png -------------------------------------------------------------------------------- /docs/articles/softmax_optimal_action.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/softmax_optimal_action.jpeg -------------------------------------------------------------------------------- /docs/articles/softmax_optimal_action.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/softmax_optimal_action.jpg -------------------------------------------------------------------------------- /docs/articles/softmax_optimal_action.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/softmax_optimal_action.png -------------------------------------------------------------------------------- /docs/articles/sutton_eg_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_eg_1.jpeg -------------------------------------------------------------------------------- /docs/articles/sutton_eg_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_eg_1.jpg -------------------------------------------------------------------------------- /docs/articles/sutton_eg_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_eg_1.png -------------------------------------------------------------------------------- /docs/articles/sutton_eg_2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_eg_2.jpeg -------------------------------------------------------------------------------- /docs/articles/sutton_eg_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_eg_2.jpg -------------------------------------------------------------------------------- /docs/articles/sutton_eg_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_eg_2.png -------------------------------------------------------------------------------- /docs/articles/sutton_gradient.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_gradient.jpeg -------------------------------------------------------------------------------- /docs/articles/sutton_gradient.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_gradient.jpg -------------------------------------------------------------------------------- /docs/articles/sutton_gradient.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_gradient.png -------------------------------------------------------------------------------- /docs/articles/sutton_optimistic.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_optimistic.jpeg -------------------------------------------------------------------------------- /docs/articles/sutton_optimistic.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_optimistic.jpg -------------------------------------------------------------------------------- /docs/articles/sutton_optimistic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_optimistic.png -------------------------------------------------------------------------------- /docs/articles/sutton_ucb.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_ucb.jpeg -------------------------------------------------------------------------------- /docs/articles/sutton_ucb.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_ucb.jpg -------------------------------------------------------------------------------- /docs/articles/sutton_ucb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_ucb.png -------------------------------------------------------------------------------- /docs/articles/sutton_violin.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_violin.jpeg -------------------------------------------------------------------------------- /docs/articles/sutton_violin.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_violin.jpg -------------------------------------------------------------------------------- /docs/articles/sutton_violin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/sutton_violin.png -------------------------------------------------------------------------------- /docs/articles/ucb_average_reward.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/ucb_average_reward.jpeg -------------------------------------------------------------------------------- /docs/articles/ucb_average_reward.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/ucb_average_reward.jpg -------------------------------------------------------------------------------- /docs/articles/ucb_average_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/ucb_average_reward.png -------------------------------------------------------------------------------- /docs/articles/ucb_cumulative_reward.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/ucb_cumulative_reward.jpeg -------------------------------------------------------------------------------- /docs/articles/ucb_cumulative_reward.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/ucb_cumulative_reward.jpg -------------------------------------------------------------------------------- /docs/articles/ucb_cumulative_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/ucb_cumulative_reward.png -------------------------------------------------------------------------------- /docs/articles/ucb_optimal_action.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/ucb_optimal_action.jpeg -------------------------------------------------------------------------------- /docs/articles/ucb_optimal_action.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/ucb_optimal_action.jpg -------------------------------------------------------------------------------- /docs/articles/ucb_optimal_action.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/articles/ucb_optimal_action.png -------------------------------------------------------------------------------- /docs/bootstrap-toc.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ 7 | 8 | /* All levels of nav */ 9 | nav[data-toggle='toc'] .nav > li > a { 10 | display: block; 11 | padding: 4px 20px; 12 | font-size: 13px; 13 | font-weight: 500; 14 | color: #767676; 15 | } 16 | nav[data-toggle='toc'] .nav > li > a:hover, 17 | nav[data-toggle='toc'] .nav > li > a:focus { 18 | padding-left: 19px; 19 | color: #563d7c; 20 | text-decoration: none; 21 | background-color: transparent; 22 | border-left: 1px solid #563d7c; 23 | } 24 | nav[data-toggle='toc'] .nav > .active > a, 25 | nav[data-toggle='toc'] .nav > .active:hover > a, 26 | nav[data-toggle='toc'] .nav > .active:focus > a { 27 | padding-left: 18px; 28 | font-weight: bold; 29 | color: #563d7c; 30 | background-color: transparent; 31 | border-left: 2px solid #563d7c; 32 | } 33 | 34 | /* Nav: second level (shown on .active) */ 35 | nav[data-toggle='toc'] .nav .nav { 36 | display: none; /* Hide by default, but at >768px, show it */ 37 | padding-bottom: 10px; 38 | } 39 | nav[data-toggle='toc'] .nav .nav > li > a { 40 | padding-top: 1px; 41 | padding-bottom: 1px; 42 | padding-left: 30px; 43 | font-size: 12px; 44 | font-weight: normal; 45 | } 46 | nav[data-toggle='toc'] .nav .nav > li > a:hover, 47 | nav[data-toggle='toc'] .nav .nav > li > a:focus { 48 | padding-left: 29px; 49 | } 50 | nav[data-toggle='toc'] .nav .nav > .active > a, 51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a, 52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a { 53 | padding-left: 28px; 54 | font-weight: 500; 55 | } 56 | 57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ 58 | nav[data-toggle='toc'] .nav > .active > ul { 59 | display: block; 60 | } 61 | -------------------------------------------------------------------------------- /docs/docsearch.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | // register a handler to move the focus to the search bar 4 | // upon pressing shift + "/" (i.e. "?") 5 | $(document).on('keydown', function(e) { 6 | if (e.shiftKey && e.keyCode == 191) { 7 | e.preventDefault(); 8 | $("#search-input").focus(); 9 | } 10 | }); 11 | 12 | $(document).ready(function() { 13 | // do keyword highlighting 14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ 15 | var mark = function() { 16 | 17 | var referrer = document.URL ; 18 | var paramKey = "q" ; 19 | 20 | if (referrer.indexOf("?") !== -1) { 21 | var qs = referrer.substr(referrer.indexOf('?') + 1); 22 | var qs_noanchor = qs.split('#')[0]; 23 | var qsa = qs_noanchor.split('&'); 24 | var keyword = ""; 25 | 26 | for (var i = 0; i < qsa.length; i++) { 27 | var currentParam = qsa[i].split('='); 28 | 29 | if (currentParam.length !== 2) { 30 | continue; 31 | } 32 | 33 | if (currentParam[0] == paramKey) { 34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); 35 | } 36 | } 37 | 38 | if (keyword !== "") { 39 | $(".contents").unmark({ 40 | done: function() { 41 | $(".contents").mark(keyword); 42 | } 43 | }); 44 | } 45 | } 46 | }; 47 | 48 | mark(); 49 | }); 50 | }); 51 | 52 | /* Search term highlighting ------------------------------*/ 53 | 54 | function matchedWords(hit) { 55 | var words = []; 56 | 57 | var hierarchy = hit._highlightResult.hierarchy; 58 | // loop to fetch from lvl0, lvl1, etc. 59 | for (var idx in hierarchy) { 60 | words = words.concat(hierarchy[idx].matchedWords); 61 | } 62 | 63 | var content = hit._highlightResult.content; 64 | if (content) { 65 | words = words.concat(content.matchedWords); 66 | } 67 | 68 | // return unique words 69 | var words_uniq = [...new Set(words)]; 70 | return words_uniq; 71 | } 72 | 73 | function updateHitURL(hit) { 74 | 75 | var words = matchedWords(hit); 76 | var url = ""; 77 | 78 | if (hit.anchor) { 79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; 80 | } else { 81 | url = hit.url + '?q=' + escape(words.join(" ")); 82 | } 83 | 84 | return url; 85 | } 86 | -------------------------------------------------------------------------------- /docs/docsearch.json: -------------------------------------------------------------------------------- 1 | { 2 | "index_name": "nth_iteration_labs_contextual", 3 | "start_urls": [ 4 | { 5 | "url": "/index.html", 6 | "selectors_key": "homepage", 7 | "tags": [ 8 | "homepage" 9 | ] 10 | }, 11 | { 12 | "url": "/reference", 13 | "selectors_key": "reference", 14 | "tags": [ 15 | "reference" 16 | ] 17 | }, 18 | { 19 | "url": "/articles", 20 | "selectors_key": "articles", 21 | "tags": [ 22 | "articles" 23 | ] 24 | } 25 | ], 26 | "stop_urls": [ 27 | "/reference/$", 28 | "/reference/index.html", 29 | "/articles/$", 30 | "/articles/index.html" 31 | ], 32 | "sitemap_urls": [ 33 | "/sitemap.xml" 34 | ], 35 | "selectors": { 36 | "homepage": { 37 | "lvl0": { 38 | "selector": ".contents h1", 39 | "default_value": "contextual Home page" 40 | }, 41 | "lvl1": { 42 | "selector": ".contents h2" 43 | }, 44 | "lvl2": { 45 | "selector": ".contents h3", 46 | "default_value": "Context" 47 | }, 48 | "lvl3": ".ref-arguments td, .ref-description", 49 | "text": ".contents p, .contents li, .contents .pre" 50 | }, 51 | "reference": { 52 | "lvl0": { 53 | "selector": ".contents h1" 54 | }, 55 | "lvl1": { 56 | "selector": ".contents .name", 57 | "default_value": "Argument" 58 | }, 59 | "lvl2": { 60 | "selector": ".ref-arguments th", 61 | "default_value": "Description" 62 | }, 63 | "lvl3": ".ref-arguments td, .ref-description", 64 | "text": ".contents p, .contents li" 65 | }, 66 | "articles": { 67 | "lvl0": { 68 | "selector": ".contents h1" 69 | }, 70 | "lvl1": { 71 | "selector": ".contents .name" 72 | }, 73 | "lvl2": { 74 | "selector": ".contents h2, .contents h3", 75 | "default_value": "Context" 76 | }, 77 | "text": ".contents p, .contents li" 78 | } 79 | }, 80 | "selectors_exclude": [ 81 | ".dont-index" 82 | ], 83 | "min_indexed_level": 2, 84 | "custom_settings": { 85 | "separatorsToIndex": "_", 86 | "attributesToRetrieve": [ 87 | "hierarchy", 88 | "content", 89 | "anchor", 90 | "url", 91 | "url_without_anchor" 92 | ] 93 | } 94 | } 95 | 96 | -------------------------------------------------------------------------------- /docs/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/favicon.ico -------------------------------------------------------------------------------- /docs/jquery.sticky-kit.min.js: -------------------------------------------------------------------------------- 1 | /* Sticky-kit v1.1.2 | WTFPL | Leaf Corcoran 2015 | */ 2 | /* 3 | Source: https://github.com/leafo/sticky-kit 4 | License: MIT 5 | */ 6 | (function(){var b,f;b=this.jQuery||window.jQuery;f=b(window);b.fn.stick_in_parent=function(d){var A,w,J,n,B,K,p,q,k,E,t;null==d&&(d={});t=d.sticky_class;B=d.inner_scrolling;E=d.recalc_every;k=d.parent;q=d.offset_top;p=d.spacer;w=d.bottoming;null==q&&(q=0);null==k&&(k=void 0);null==B&&(B=!0);null==t&&(t="is_stuck");A=b(document);null==w&&(w=!0);J=function(a,d,n,C,F,u,r,G){var v,H,m,D,I,c,g,x,y,z,h,l;if(!a.data("sticky_kit")){a.data("sticky_kit",!0);I=A.height();g=a.parent();null!=k&&(g=g.closest(k)); 7 | if(!g.length)throw"failed to find stick parent";v=m=!1;(h=null!=p?p&&a.closest(p):b("
"))&&h.css("position",a.css("position"));x=function(){var c,f,e;if(!G&&(I=A.height(),c=parseInt(g.css("border-top-width"),10),f=parseInt(g.css("padding-top"),10),d=parseInt(g.css("padding-bottom"),10),n=g.offset().top+c+f,C=g.height(),m&&(v=m=!1,null==p&&(a.insertAfter(h),h.detach()),a.css({position:"",top:"",width:"",bottom:""}).removeClass(t),e=!0),F=a.offset().top-(parseInt(a.css("margin-top"),10)||0)-q, 8 | u=a.outerHeight(!0),r=a.css("float"),h&&h.css({width:a.outerWidth(!0),height:u,display:a.css("display"),"vertical-align":a.css("vertical-align"),"float":r}),e))return l()};x();if(u!==C)return D=void 0,c=q,z=E,l=function(){var b,l,e,k;if(!G&&(e=!1,null!=z&&(--z,0>=z&&(z=E,x(),e=!0)),e||A.height()===I||x(),e=f.scrollTop(),null!=D&&(l=e-D),D=e,m?(w&&(k=e+u+c>C+n,v&&!k&&(v=!1,a.css({position:"fixed",bottom:"",top:c}).trigger("sticky_kit:unbottom"))),eb&&!v&&(c-=l,c=Math.max(b-u,c),c=Math.min(q,c),m&&a.css({top:c+"px"})))):e>F&&(m=!0,b={position:"fixed",top:c},b.width="border-box"===a.css("box-sizing")?a.outerWidth()+"px":a.width()+"px",a.css(b).addClass(t),null==p&&(a.after(h),"left"!==r&&"right"!==r||h.append(a)),a.trigger("sticky_kit:stick")),m&&w&&(null==k&&(k=e+u+c>C+n),!v&&k)))return v=!0,"static"===g.css("position")&&g.css({position:"relative"}), 10 | a.css({position:"absolute",bottom:d,top:"auto"}).trigger("sticky_kit:bottom")},y=function(){x();return l()},H=function(){G=!0;f.off("touchmove",l);f.off("scroll",l);f.off("resize",y);b(document.body).off("sticky_kit:recalc",y);a.off("sticky_kit:detach",H);a.removeData("sticky_kit");a.css({position:"",bottom:"",top:"",width:""});g.position("position","");if(m)return null==p&&("left"!==r&&"right"!==r||a.insertAfter(h),h.remove()),a.removeClass(t)},f.on("touchmove",l),f.on("scroll",l),f.on("resize", 11 | y),b(document.body).on("sticky_kit:recalc",y),a.on("sticky_kit:detach",H),setTimeout(l,0)}};n=0;for(K=this.length;n 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 2.7.3 2 | pkgdown: 1.5.1 3 | pkgdown_sha: ~ 4 | articles: 5 | cmabs: cmabs.html 6 | cmabsoffline: cmabsoffline.html 7 | eckles_kaptein: eckles_kaptein.html 8 | epsilongreedy: epsilongreedy.html 9 | introduction: introduction.html 10 | mabs: mabs.html 11 | ml10m: ml10m.html 12 | offline_depaul_movies: offline_depaul_movies.html 13 | faq: only_pkgdown/faq.html 14 | replication: replication.html 15 | simpsons: simpsons.html 16 | sutton_barto: sutton_barto.html 17 | website_optimization: website_optimization.html 18 | last_built: 2020-07-25T14:34Z 19 | 20 | -------------------------------------------------------------------------------- /docs/reference/EpsilonFirstPolicy-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/EpsilonFirstPolicy-1.png -------------------------------------------------------------------------------- /docs/reference/EpsilonFirstPolicy-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/EpsilonFirstPolicy-2.png -------------------------------------------------------------------------------- /docs/reference/EpsilonGreedyPolicy-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/EpsilonGreedyPolicy-1.png -------------------------------------------------------------------------------- /docs/reference/EpsilonGreedyPolicy-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/EpsilonGreedyPolicy-2.png -------------------------------------------------------------------------------- /docs/reference/Exp3Policy-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/Exp3Policy-1.png -------------------------------------------------------------------------------- /docs/reference/Exp3Policy-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/Exp3Policy-2.png -------------------------------------------------------------------------------- /docs/reference/GradientPolicy-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/GradientPolicy-1.png -------------------------------------------------------------------------------- /docs/reference/GradientPolicy-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/GradientPolicy-2.png -------------------------------------------------------------------------------- /docs/reference/RandomPolicy-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/RandomPolicy-1.png -------------------------------------------------------------------------------- /docs/reference/SoftmaxPolicy-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/SoftmaxPolicy-1.png -------------------------------------------------------------------------------- /docs/reference/SoftmaxPolicy-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/SoftmaxPolicy-2.png -------------------------------------------------------------------------------- /docs/reference/ThompsonSamplingPolicy-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/ThompsonSamplingPolicy-1.png -------------------------------------------------------------------------------- /docs/reference/UCB1Policy-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/UCB1Policy-1.png -------------------------------------------------------------------------------- /docs/reference/UCB1Policy-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/UCB1Policy-2.png -------------------------------------------------------------------------------- /docs/reference/UCB2Policy-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/UCB2Policy-1.png -------------------------------------------------------------------------------- /docs/reference/UCB2Policy-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/UCB2Policy-2.png -------------------------------------------------------------------------------- /docs/reference/figures/1simulator.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/figures/1simulator.jpeg -------------------------------------------------------------------------------- /docs/reference/figures/2agent.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/figures/2agent.jpeg -------------------------------------------------------------------------------- /docs/reference/figures/3abandit.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/figures/3abandit.jpeg -------------------------------------------------------------------------------- /docs/reference/figures/3bpolicy.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/figures/3bpolicy.jpeg -------------------------------------------------------------------------------- /docs/reference/figures/3cbandit.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/figures/3cbandit.jpeg -------------------------------------------------------------------------------- /docs/reference/figures/3dpolicy.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/figures/3dpolicy.jpeg -------------------------------------------------------------------------------- /docs/reference/figures/algoepsilonfirst.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/figures/algoepsilonfirst.jpg -------------------------------------------------------------------------------- /docs/reference/figures/cmab_all.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/figures/cmab_all.jpeg -------------------------------------------------------------------------------- /docs/reference/figures/cmab_all_large.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/figures/cmab_all_large.jpg -------------------------------------------------------------------------------- /docs/reference/figures/cmab_all_medium.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/figures/cmab_all_medium.jpg -------------------------------------------------------------------------------- /docs/reference/invgamma-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/invgamma-1.png -------------------------------------------------------------------------------- /docs/reference/value_remaining-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/docs/reference/value_remaining-1.png -------------------------------------------------------------------------------- /man/BootstrapTSPolicy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/policy_mab_ts_bootstrap.R 3 | \name{BootstrapTSPolicy} 4 | \alias{BootstrapTSPolicy} 5 | \title{Policy: Thompson sampling with the online bootstrap} 6 | \description{ 7 | Bootstrap Thompson Sampling 8 | } 9 | \details{ 10 | Bootstrap Thompson Sampling (BTS) is a heuristic method 11 | for solving bandit problems which modifies Thompson Sampling 12 | (see \link{ThompsonSamplingPolicy}) by replacing the posterior distribution 13 | used in Thompson sampling by a bootstrap distribution. 14 | } 15 | \section{Usage}{ 16 | 17 | \preformatted{ 18 | policy <- BootstrapTSPolicy(J = 100, a= 1, b = 1) 19 | } 20 | 21 | 22 | \preformatted{ 23 | policy <- BootstrapTSPolicy(1000) 24 | } 25 | } 26 | 27 | \section{Arguments}{ 28 | 29 | 30 | \describe{ 31 | \item{\code{new(J = 100, a= 1, b = 1)}}{ Generates a new \code{BootstrapTSPolicy} object. 32 | Arguments are defined in the Argument section above.} 33 | } 34 | 35 | \describe{ 36 | \item{\code{set_parameters()}}{each policy needs to assign the parameters it wants to keep track of 37 | to list \code{self$theta_to_arms} that has to be defined in \code{set_parameters()}'s body. 38 | The parameters defined here can later be accessed by arm index in the following way: 39 | \code{theta[[index_of_arm]]$parameter_name} 40 | } 41 | } 42 | 43 | \describe{ 44 | \item{\code{get_action(context)}}{ 45 | here, a policy decides which arm to choose, based on the current values 46 | of its parameters and, potentially, the current context. 47 | } 48 | } 49 | 50 | \describe{ 51 | \item{\code{set_reward(reward, context)}}{ 52 | in \code{set_reward(reward, context)}, a policy updates its parameter values 53 | based on the reward received, and, potentially, the current context. 54 | } 55 | } 56 | } 57 | 58 | \references{ 59 | Eckles, D., & Kaptein, M. (2014). Thompson sampling with the online bootstrap. 60 | arXiv preprint arXiv:1410.4009. 61 | 62 | Thompson, W. R. (1933). On the likelihood that one unknown probability exceeds another in 63 | view of the evidence of two samples. Biometrika, 25(3/4), 285-294. 64 | } 65 | \seealso{ 66 | Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}}, 67 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}} 68 | 69 | Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}}, 70 | \code{\link{OfflineReplayEvaluatorBandit}} 71 | 72 | Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}} 73 | } 74 | -------------------------------------------------------------------------------- /man/ContextualEpochGreedyPolicy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/policy_cmab_lin_epoch_greedy.R 3 | \name{ContextualEpochGreedyPolicy} 4 | \alias{ContextualEpochGreedyPolicy} 5 | \title{Policy: A Time and Space Efficient Algorithm for Contextual Linear Bandits} 6 | \description{ 7 | Policy: A Time and Space Efficient Algorithm for Contextual Linear Bandits 8 | } 9 | \section{Usage}{ 10 | 11 | \preformatted{ 12 | policy <- ContextualEpochGreedyPolicy$new(sZl = 10) 13 | } 14 | } 15 | 16 | \seealso{ 17 | Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}}, 18 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}} 19 | 20 | Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}}, 21 | \code{\link{OfflineReplayEvaluatorBandit}} 22 | 23 | Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}} 24 | } 25 | -------------------------------------------------------------------------------- /man/ContextualEpsilonGreedyPolicy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/policy_cmab_lin_epsilon_greedy.R 3 | \name{ContextualEpsilonGreedyPolicy} 4 | \alias{ContextualEpsilonGreedyPolicy} 5 | \title{Policy: ContextualEpsilonGreedyPolicy with unique linear models} 6 | \description{ 7 | Policy: ContextualEpsilonGreedyPolicy with unique linear models 8 | } 9 | \section{Usage}{ 10 | 11 | \preformatted{ 12 | policy <- ContextualEpsilonGreedyPolicy(epsilon = 0.1) 13 | } 14 | } 15 | 16 | \section{Arguments}{ 17 | 18 | 19 | \describe{ 20 | \item{\code{epsilon}}{ 21 | double, a positive real value R+ 22 | } 23 | } 24 | } 25 | 26 | \section{Parameters}{ 27 | 28 | 29 | \describe{ 30 | \item{\code{A}}{ 31 | d*d identity matrix 32 | } 33 | \item{\code{b}}{ 34 | a zero vector of length d 35 | } 36 | } 37 | } 38 | 39 | \section{Methods}{ 40 | 41 | 42 | \describe{ 43 | \item{\code{new(epsilon = 0.1)}}{ Generates a new \code{ContextualEpsilonGreedyPolicy} object. 44 | Arguments are defined in the Argument section above.} 45 | } 46 | 47 | \describe{ 48 | \item{\code{set_parameters()}}{each policy needs to assign the parameters it wants to keep track of 49 | to list \code{self$theta_to_arms} that has to be defined in \code{set_parameters()}'s body. 50 | The parameters defined here can later be accessed by arm index in the following way: 51 | \code{theta[[index_of_arm]]$parameter_name} 52 | } 53 | } 54 | 55 | \describe{ 56 | \item{\code{get_action(context)}}{ 57 | here, a policy decides which arm to choose, based on the current values 58 | of its parameters and, potentially, the current context. 59 | } 60 | } 61 | 62 | \describe{ 63 | \item{\code{set_reward(reward, context)}}{ 64 | in \code{set_reward(reward, context)}, a policy updates its parameter values 65 | based on the reward received, and, potentially, the current context. 66 | } 67 | } 68 | } 69 | 70 | \seealso{ 71 | Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}}, 72 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}} 73 | 74 | Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}}, 75 | \code{\link{OfflineReplayEvaluatorBandit}} 76 | 77 | Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}} 78 | } 79 | -------------------------------------------------------------------------------- /man/ContextualLogitBTSPolicy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/policy_cmab_logit_ts_bootstrap.R 3 | \name{ContextualLogitBTSPolicy} 4 | \alias{ContextualLogitBTSPolicy} 5 | \title{Policy: ContextualLogitBTSPolicy} 6 | \description{ 7 | Policy: ContextualLogitBTSPolicy 8 | } 9 | \section{Usage}{ 10 | 11 | \preformatted{ 12 | policy <- ContextualLogitBTSPolicy() 13 | } 14 | } 15 | 16 | \seealso{ 17 | Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}}, 18 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}} 19 | 20 | Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}}, 21 | \code{\link{OfflineReplayEvaluatorBandit}} 22 | 23 | Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}} 24 | } 25 | -------------------------------------------------------------------------------- /man/ContextualTSProbitPolicy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/policy_cmab_probit_ts.R 3 | \name{ContextualTSProbitPolicy} 4 | \alias{ContextualTSProbitPolicy} 5 | \title{Policy: ContextualTSProbitPolicy} 6 | \description{ 7 | Makes use of BOPR, ergo only use binary indepependent variables. 8 | } 9 | \section{Usage}{ 10 | 11 | \preformatted{ 12 | policy <- ContextualTSProbitPolicy() 13 | } 14 | } 15 | 16 | \seealso{ 17 | Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}}, 18 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}} 19 | 20 | Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}}, 21 | \code{\link{OfflineReplayEvaluatorBandit}} 22 | 23 | Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}} 24 | } 25 | -------------------------------------------------------------------------------- /man/FixedPolicy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/policy_fixed.R 3 | \name{FixedPolicy} 4 | \alias{FixedPolicy} 5 | \title{Policy: Fixed Arm} 6 | \description{ 7 | \code{FixedPolicy} implements a "naive" policy which always chooses a prespecified arm. 8 | } 9 | \section{Usage}{ 10 | 11 | \preformatted{ 12 | policy <- FixedPolicy(fixed_arm = 1) 13 | } 14 | } 15 | 16 | \section{Arguments}{ 17 | 18 | 19 | \describe{ 20 | \item{\code{fixed_arm}}{ 21 | numeric; index of the arm that will be chosen for each time step. 22 | } 23 | } 24 | } 25 | 26 | \section{Methods}{ 27 | 28 | 29 | \describe{ 30 | \item{\code{new()}}{ Generates a new \code{FixedPolicy} object. Arguments are defined in the Argument 31 | section above.} 32 | } 33 | 34 | \describe{ 35 | \item{\code{set_parameters()}}{each policy needs to assign the parameters it wants to keep track of 36 | to list \code{self$theta_to_arms} that has to be defined in \code{set_parameters()}'s body. 37 | The parameters defined here can later be accessed by arm index in the following way: 38 | \code{theta[[index_of_arm]]$parameter_name} 39 | } 40 | } 41 | 42 | \describe{ 43 | \item{\code{get_action(context)}}{ 44 | here, a policy decides which arm to choose, based on the current values 45 | of its parameters and, potentially, the current context. 46 | } 47 | } 48 | 49 | \describe{ 50 | \item{\code{set_reward(reward, context)}}{ 51 | in \code{set_reward(reward, context)}, a policy updates its parameter values 52 | based on the reward received, and, potentially, the current context. 53 | } 54 | } 55 | } 56 | 57 | \seealso{ 58 | Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}}, 59 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}} 60 | 61 | Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}}, 62 | \code{\link{OfflineReplayEvaluatorBandit}} 63 | 64 | Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}} 65 | } 66 | -------------------------------------------------------------------------------- /man/OraclePolicy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/policy_oracle.R 3 | \name{OraclePolicy} 4 | \alias{OraclePolicy} 5 | \title{Policy: Oracle} 6 | \description{ 7 | \code{OraclePolicy} is a also known as a "cheating" or "godlike" 8 | policy, as it knows the reward probabilities at all times, 9 | and will always play the optimal arm. It is often used as 10 | a baseline to compare other policies to. 11 | } 12 | \section{Usage}{ 13 | 14 | \preformatted{ 15 | policy <- OraclePolicy() 16 | } 17 | } 18 | 19 | \section{Arguments}{ 20 | 21 | 22 | \describe{ 23 | \item{\code{name}}{ 24 | character string specifying this policy. \code{name} 25 | is, among others, saved to the History log and displayed in summaries and plots. 26 | } 27 | } 28 | } 29 | 30 | \section{Methods}{ 31 | 32 | 33 | \describe{ 34 | \item{\code{new()}}{ Generates a new \code{OraclePolicy} object. Arguments are defined in the Argument 35 | section above.} 36 | } 37 | 38 | \describe{ 39 | \item{\code{set_parameters()}}{each policy needs to assign the parameters it wants to keep track of 40 | to list \code{self$theta_to_arms} that has to be defined in \code{set_parameters()}'s body. 41 | The parameters defined here can later be accessed by arm index in the following way: 42 | \code{theta[[index_of_arm]]$parameter_name} 43 | } 44 | } 45 | 46 | \describe{ 47 | \item{\code{get_action(context)}}{ 48 | here, a policy decides which arm to choose, based on the current values 49 | of its parameters and, potentially, the current context. 50 | } 51 | } 52 | 53 | \describe{ 54 | \item{\code{set_reward(reward, context)}}{ 55 | in \code{set_reward(reward, context)}, a policy updates its parameter values 56 | based on the reward received, and, potentially, the current context. 57 | } 58 | } 59 | } 60 | 61 | \references{ 62 | Gittins, J., Glazebrook, K., & Weber, R. (2011). Multi-armed bandit allocation indices. John Wiley & Sons. 63 | (Original work published 1989) 64 | } 65 | \seealso{ 66 | Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}}, 67 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}} 68 | 69 | Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}}, 70 | \code{\link{OfflineReplayEvaluatorBandit}} 71 | 72 | Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}} 73 | } 74 | -------------------------------------------------------------------------------- /man/RandomPolicy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/policy_random.R 3 | \name{RandomPolicy} 4 | \alias{RandomPolicy} 5 | \title{Policy: Random} 6 | \description{ 7 | \code{RandomPolicy} always explores, choosing arms uniformly at random. 8 | In that respect, \code{RandomPolicy} is the mirror image of a pure greedy policy, 9 | which would always seek to exploit. 10 | } 11 | \section{Usage}{ 12 | 13 | \preformatted{ 14 | policy <- RandomPolicy(name = "RandomPolicy") 15 | } 16 | } 17 | 18 | \section{Arguments}{ 19 | 20 | 21 | \describe{ 22 | \item{\code{name}}{ 23 | character string specifying this policy. \code{name} 24 | is, among others, saved to the History log and displayed in summaries and plots. 25 | } 26 | } 27 | } 28 | 29 | \section{Methods}{ 30 | 31 | 32 | \describe{ 33 | \item{\code{new()}}{ Generates a new \code{RandomPolicy} object. Arguments are defined in the Argument 34 | section above.} 35 | } 36 | 37 | \describe{ 38 | \item{\code{set_parameters()}}{each policy needs to assign the parameters it wants to keep track of 39 | to list \code{self$theta_to_arms} that has to be defined in \code{set_parameters()}'s body. 40 | The parameters defined here can later be accessed by arm index in the following way: 41 | \code{theta[[index_of_arm]]$parameter_name} 42 | } 43 | } 44 | 45 | \describe{ 46 | \item{\code{get_action(context)}}{ 47 | here, a policy decides which arm to choose, based on the current values 48 | of its parameters and, potentially, the current context. 49 | } 50 | } 51 | 52 | \describe{ 53 | \item{\code{set_reward(reward, context)}}{ 54 | in \code{set_reward(reward, context)}, a policy updates its parameter values 55 | based on the reward received, and, potentially, the current context. 56 | } 57 | } 58 | } 59 | 60 | \examples{ 61 | 62 | horizon <- 100L 63 | simulations <- 100L 64 | weights <- c(0.9, 0.1, 0.1) 65 | 66 | policy <- RandomPolicy$new() 67 | bandit <- BasicBernoulliBandit$new(weights = weights) 68 | agent <- Agent$new(policy, bandit) 69 | 70 | history <- Simulator$new(agent, horizon, simulations, do_parallel = FALSE)$run() 71 | 72 | plot(history, type = "arms") 73 | } 74 | \references{ 75 | Gittins, J., Glazebrook, K., & Weber, R. (2011). Multi-armed bandit allocation indices. John Wiley & Sons. 76 | (Original work published 1989) 77 | } 78 | \seealso{ 79 | Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}}, 80 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}} 81 | 82 | Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}}, 83 | \code{\link{OfflineReplayEvaluatorBandit}} 84 | 85 | Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}} 86 | } 87 | -------------------------------------------------------------------------------- /man/UCB1Policy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/policy_mab_ucb1.R 3 | \name{UCB1Policy} 4 | \alias{UCB1Policy} 5 | \title{Policy: UCB1} 6 | \description{ 7 | UCB policy for bounded bandits with a Chernoff-Hoeffding Bound 8 | } 9 | \details{ 10 | \code{UCB1Policy} constructs an optimistic estimate in the form of an Upper Confidence Bound to 11 | create an estimate of the expected payoff of each action, and picks the action with the highest estimate. 12 | If the guess is wrong, the optimistic guess quickly decreases, till another action has 13 | the higher estimate. 14 | } 15 | \section{Usage}{ 16 | 17 | \preformatted{ 18 | policy <- UCB1Policy() 19 | } 20 | } 21 | 22 | \section{Methods}{ 23 | 24 | 25 | \describe{ 26 | \item{\code{new()}}{ Generates a new \code{UCB1Policy} object.} 27 | } 28 | 29 | \describe{ 30 | \item{\code{set_parameters()}}{each policy needs to assign the parameters it wants to keep track of 31 | to list \code{self$theta_to_arms} that has to be defined in \code{set_parameters()}'s body. 32 | The parameters defined here can later be accessed by arm index in the following way: 33 | \code{theta[[index_of_arm]]$parameter_name} 34 | } 35 | } 36 | 37 | \describe{ 38 | \item{\code{get_action(context)}}{ 39 | here, a policy decides which arm to choose, based on the current values 40 | of its parameters and, potentially, the current context. 41 | } 42 | } 43 | 44 | \describe{ 45 | \item{\code{set_reward(reward, context)}}{ 46 | in \code{set_reward(reward, context)}, a policy updates its parameter values 47 | based on the reward received, and, potentially, the current context. 48 | } 49 | } 50 | } 51 | 52 | \examples{ 53 | \dontrun{ 54 | 55 | horizon <- 100L 56 | simulations <- 100L 57 | weights <- c(0.9, 0.1, 0.1) 58 | 59 | policy <- UCB1Policy$new() 60 | bandit <- BasicBernoulliBandit$new(weights = weights) 61 | agent <- Agent$new(policy, bandit) 62 | 63 | history <- Simulator$new(agent, horizon, simulations, do_parallel = FALSE)$run() 64 | 65 | plot(history, type = "cumulative") 66 | 67 | plot(history, type = "arms") 68 | 69 | } 70 | } 71 | \references{ 72 | Lai, T. L., & Robbins, H. (1985). Asymptotically efficient adaptive allocation rules. Advances in applied 73 | mathematics, 6(1), 4-22. 74 | } 75 | \seealso{ 76 | Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}}, 77 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}} 78 | 79 | Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}}, 80 | \code{\link{OfflineReplayEvaluatorBandit}} 81 | 82 | Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}} 83 | } 84 | -------------------------------------------------------------------------------- /man/clipr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{clipr} 4 | \alias{clipr} 5 | \title{Clip vectors} 6 | \usage{ 7 | clipr(x, min, max) 8 | } 9 | \arguments{ 10 | \item{x}{to be clipped vector} 11 | 12 | \item{min}{numeric. lowest value} 13 | 14 | \item{max}{numeric. highest value} 15 | } 16 | \description{ 17 | Clips values to a mininum and maximum value. That is, all values below the lower clamp 18 | value and the upper clamp value become the lower/upper value specified 19 | } 20 | -------------------------------------------------------------------------------- /man/data_table_factors_to_numeric.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{data_table_factors_to_numeric} 4 | \alias{data_table_factors_to_numeric} 5 | \title{Convert all factor columns in data.table to numeric} 6 | \usage{ 7 | data_table_factors_to_numeric(dt) 8 | } 9 | \arguments{ 10 | \item{dt}{a data.table} 11 | } 12 | \value{ 13 | the data.table with column factors converted to numeric 14 | } 15 | \description{ 16 | Convert all factor columns in data.table to numeric 17 | } 18 | -------------------------------------------------------------------------------- /man/dec-set.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{dec<-} 4 | \alias{dec<-} 5 | \title{Decrement} 6 | \usage{ 7 | dec(x) <- value 8 | } 9 | \arguments{ 10 | \item{x}{object to be decremented} 11 | 12 | \item{value}{value by which x will be modified} 13 | } 14 | \description{ 15 | \code{dec<-} decrements \code{x} by value. Equivalent to \code{x <- x - value.} 16 | } 17 | \examples{ 18 | x <- 6:10 19 | dec(x) <- 5 20 | x 21 | 22 | } 23 | -------------------------------------------------------------------------------- /man/figures/1simulator.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/man/figures/1simulator.jpeg -------------------------------------------------------------------------------- /man/figures/2agent.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/man/figures/2agent.jpeg -------------------------------------------------------------------------------- /man/figures/3abandit.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/man/figures/3abandit.jpeg -------------------------------------------------------------------------------- /man/figures/3bpolicy.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/man/figures/3bpolicy.jpeg -------------------------------------------------------------------------------- /man/figures/3cbandit.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/man/figures/3cbandit.jpeg -------------------------------------------------------------------------------- /man/figures/3dpolicy.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/man/figures/3dpolicy.jpeg -------------------------------------------------------------------------------- /man/figures/algoepsilonfirst.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/man/figures/algoepsilonfirst.jpg -------------------------------------------------------------------------------- /man/figures/cmab_all.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/man/figures/cmab_all.jpeg -------------------------------------------------------------------------------- /man/figures/cmab_all_large.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/man/figures/cmab_all_large.jpg -------------------------------------------------------------------------------- /man/figures/cmab_all_medium.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/man/figures/cmab_all_medium.jpg -------------------------------------------------------------------------------- /man/formatted_difftime.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{formatted_difftime} 4 | \alias{formatted_difftime} 5 | \title{Format difftime objects} 6 | \usage{ 7 | formatted_difftime(x) 8 | } 9 | \arguments{ 10 | \item{x}{difftime object} 11 | } 12 | \value{ 13 | string "days, h:mm:ss.ms" 14 | } 15 | \description{ 16 | Format difftime objects 17 | } 18 | -------------------------------------------------------------------------------- /man/get_arm_context.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{get_arm_context} 4 | \alias{get_arm_context} 5 | \title{Return context vector of an arm} 6 | \usage{ 7 | get_arm_context( 8 | context, 9 | arm, 10 | select_features = NULL, 11 | prepend_arm_vector = FALSE 12 | ) 13 | } 14 | \arguments{ 15 | \item{context}{a context list containing a d x k Matrix or 16 | d dimensional context vector X, the number of features d and 17 | number of arms k.} 18 | 19 | \item{arm}{index of arm.} 20 | 21 | \item{select_features}{indices of to be returned features.} 22 | 23 | \item{prepend_arm_vector}{prepend a one-hot-encoded arm vector to the returned context vector. That is, 24 | when k = 5 arms, and the to be returned arm vector is arm 3, prepend c(0,0,1,0,0)} 25 | } 26 | \value{ 27 | Vector that represents context related to an arm 28 | } 29 | \description{ 30 | Given d x k matrix or d dimensional vector X, 31 | returns a vector with arm's context. 32 | } 33 | -------------------------------------------------------------------------------- /man/get_full_context.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{get_full_context} 4 | \alias{get_full_context} 5 | \title{Get full context matrix over all arms} 6 | \usage{ 7 | get_full_context(context, select_features = NULL, prepend_arm_matrix = FALSE) 8 | } 9 | \arguments{ 10 | \item{context}{a context list containing a d x k Matrix or 11 | d dimensional context vector X, the number of features d and 12 | number of arms k.} 13 | 14 | \item{select_features}{indices of to be returned feature rows.b} 15 | 16 | \item{prepend_arm_matrix}{prepend a diagonal arm matrix to the returned context vector. That is, 17 | when k = 5 arms, prepend diag(5) to the top of the matrix.} 18 | } 19 | \value{ 20 | A d x k context Matrix 21 | } 22 | \description{ 23 | Given matrix or d dimensional vector X, 24 | number of arms k and number of features d 25 | returns a matrix with d x k context matrix 26 | } 27 | -------------------------------------------------------------------------------- /man/get_global_seed.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{get_global_seed} 4 | \alias{get_global_seed} 5 | \title{Lookup .Random.seed in global environment} 6 | \usage{ 7 | get_global_seed() 8 | } 9 | \value{ 10 | an integer vector, containing the random number generator (RNG) state for random number generation 11 | } 12 | \description{ 13 | Lookup .Random.seed in global environment 14 | } 15 | -------------------------------------------------------------------------------- /man/inc-set.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{inc<-} 4 | \alias{inc<-} 5 | \title{Increment} 6 | \usage{ 7 | inc(x) <- value 8 | } 9 | \arguments{ 10 | \item{x}{object to be incremented} 11 | 12 | \item{value}{value by which x will be modified} 13 | } 14 | \description{ 15 | \code{inc<-} increments \code{x} by value. Equivalent to \code{x <- x + value.} 16 | } 17 | \examples{ 18 | x <- 1:5 19 | inc(x) <- 5 20 | x 21 | 22 | } 23 | -------------------------------------------------------------------------------- /man/ind.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{ind} 4 | \alias{ind} 5 | \title{On-the-fly indicator function for use in formulae} 6 | \usage{ 7 | ind(cond) 8 | } 9 | \arguments{ 10 | \item{cond}{a logical condition to be evaluated} 11 | } 12 | \value{ 13 | a binary (0/1) coded variable indicating whether the condition is true 14 | } 15 | \description{ 16 | On-the-fly indicator function for use in formulae 17 | } 18 | -------------------------------------------------------------------------------- /man/inv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{inv} 4 | \alias{inv} 5 | \title{Inverse from Choleski (or QR) Decomposition.} 6 | \usage{ 7 | inv(M) 8 | } 9 | \arguments{ 10 | \item{M}{matrix} 11 | } 12 | \description{ 13 | Invert a symmetric, positive definite square matrix from its Choleski decomposition. 14 | } 15 | \examples{ 16 | inv(cbind(1, 1:3, c(1,3,7))) 17 | 18 | } 19 | -------------------------------------------------------------------------------- /man/invgamma.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{invgamma} 4 | \alias{invgamma} 5 | \alias{dinvgamma} 6 | \alias{pinvgamma} 7 | \alias{qinvgamma} 8 | \alias{rinvgamma} 9 | \title{The Inverse Gamma Distribution} 10 | \usage{ 11 | dinvgamma(x, shape, rate = 1, scale = 1/rate, log = FALSE) 12 | 13 | pinvgamma(q, shape, rate = 1, scale = 1/rate, lower.tail = TRUE, log.p = FALSE) 14 | 15 | qinvgamma(p, shape, rate = 1, scale = 1/rate, lower.tail = TRUE, log.p = FALSE) 16 | 17 | rinvgamma(n, shape, rate = 1, scale = 1/rate) 18 | } 19 | \arguments{ 20 | \item{x, q}{vector of quantiles.} 21 | 22 | \item{shape}{inverse gamma shape parameter} 23 | 24 | \item{rate}{inverse gamma rate parameter} 25 | 26 | \item{scale}{alternative to rate; scale = 1/rate} 27 | 28 | \item{log, log.p}{logical; if TRUE, probabilities p are given as 29 | log(p).} 30 | 31 | \item{lower.tail}{logical; if TRUE (default), probabilities are P(X <= x) otherwise, P(X > x).} 32 | 33 | \item{p}{vector of probabilities.} 34 | 35 | \item{n}{number of observations. If length(n) > 1, the length is 36 | taken to be the number required.} 37 | } 38 | \description{ 39 | Density, distribution function, quantile function and random 40 | generation for the inverse gamma distribution. 41 | } 42 | \details{ 43 | The inverse gamma distribution with parameters shape and rate has 44 | density \emph{f(x) = rate^shape/Gamma(shape) x^(-1-shape) 45 | e^(-rate/x)} it is the inverse of the standard gamma 46 | parameterization in R. 47 | 48 | The functions (d/p/q/r)invgamma simply wrap those of the standard 49 | (d/p/q/r)gamma R implementation, so look at, say, 50 | \code{\link{dgamma}} for details. 51 | } 52 | \examples{ 53 | 54 | s <- seq(0, 5, .01) 55 | plot(s, dinvgamma(s, 7, 10), type = 'l') 56 | 57 | f <- function(x) dinvgamma(x, 7, 10) 58 | q <- 2 59 | integrate(f, 0, q) 60 | (p <- pinvgamma(q, 7, 10)) 61 | qinvgamma(p, 7, 10) # = q 62 | mean(rinvgamma(1e5, 7, 10) <= q) 63 | } 64 | -------------------------------------------------------------------------------- /man/invlogit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{invlogit} 4 | \alias{invlogit} 5 | \title{Inverse Logit Function} 6 | \usage{ 7 | invlogit(x) 8 | } 9 | \arguments{ 10 | \item{x}{A numeric object.} 11 | } 12 | \value{ 13 | An object of the same type as x containing the inverse logits of the input values. 14 | } 15 | \description{ 16 | Given a numeric object return the inverse logit of the values. 17 | } 18 | -------------------------------------------------------------------------------- /man/is_rstudio.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{is_rstudio} 4 | \alias{is_rstudio} 5 | \title{Check if in RStudio} 6 | \usage{ 7 | is_rstudio() 8 | } 9 | \value{ 10 | A \code{logical} value that indicates whether R is open in RStudio. 11 | } 12 | \description{ 13 | Detects whether R is open in RStudio. 14 | } 15 | \examples{ 16 | is_rstudio() 17 | 18 | } 19 | -------------------------------------------------------------------------------- /man/mvrnorm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{mvrnorm} 4 | \alias{mvrnorm} 5 | \title{Simulate from a Multivariate Normal Distribution} 6 | \usage{ 7 | mvrnorm(n, mu, sigma) 8 | } 9 | \arguments{ 10 | \item{n}{the number of samples required.} 11 | 12 | \item{mu}{a vector giving the means of the variables.} 13 | 14 | \item{sigma}{a positive-definite symmetric matrix specifying the covariance matrix of the variables.} 15 | } 16 | \value{ 17 | If \code{n = 1} a vector of the same length as \code{mu}, otherwise an \code{n} by 18 | \code{length(mu)} matrix with one sample in each row. 19 | } 20 | \description{ 21 | Produces one or more samples from the specified 22 | multivariate normal distribution. 23 | } 24 | -------------------------------------------------------------------------------- /man/one_hot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{one_hot} 4 | \alias{one_hot} 5 | \title{One Hot Encoding of data.table columns} 6 | \usage{ 7 | one_hot( 8 | dt, 9 | cols = "auto", 10 | sparsifyNAs = FALSE, 11 | naCols = FALSE, 12 | dropCols = TRUE, 13 | dropUnusedLevels = FALSE 14 | ) 15 | } 16 | \arguments{ 17 | \item{dt}{A data.table} 18 | 19 | \item{cols}{Which column(s) should be one-hot-encoded? DEFAULT = "auto" encodes all unordered 20 | factor columns.} 21 | 22 | \item{sparsifyNAs}{Should NAs be converted to 0s?} 23 | 24 | \item{naCols}{Should columns be generated to indicate the present of NAs? Will only apply to factor 25 | columns with at least one NA} 26 | 27 | \item{dropCols}{Should the resulting data.table exclude the original columns which are one-hot-encoded?} 28 | 29 | \item{dropUnusedLevels}{Should columns of all 0s be generated for unused factor levels?} 30 | } 31 | \description{ 32 | One-Hot-Encode unordered factor columns of a data.table mltools. From ben519's "mltools" package. 33 | } 34 | \details{ 35 | One-hot-encoding converts an unordered categorical vector (i.e. a factor) to multiple binarized vectors 36 | where each binary vector of 37 | 1s and 0s indicates the presence of a class (i.e. level) of the of the original vector. 38 | } 39 | \examples{ 40 | library(data.table) 41 | 42 | dt <- data.table( 43 | ID = 1:4, 44 | color = factor(c("red", NA, "blue", "blue"), levels=c("blue", "green", "red")) 45 | ) 46 | 47 | one_hot(dt) 48 | one_hot(dt, sparsifyNAs=TRUE) 49 | one_hot(dt, naCols=TRUE) 50 | one_hot(dt, dropCols=FALSE) 51 | one_hot(dt, dropUnusedLevels=TRUE) 52 | 53 | } 54 | -------------------------------------------------------------------------------- /man/ones_in_zeroes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{ones_in_zeroes} 4 | \alias{ones_in_zeroes} 5 | \title{A vector of zeroes and ones} 6 | \usage{ 7 | ones_in_zeroes(vector_length, index_of_one) 8 | } 9 | \arguments{ 10 | \item{vector_length}{How long will the vector be?} 11 | 12 | \item{index_of_one}{Where to insert the one?} 13 | } 14 | \value{ 15 | Vector of zeroes with one(s) at given index position(s) 16 | } 17 | \description{ 18 | A vector of zeroes and ones 19 | } 20 | -------------------------------------------------------------------------------- /man/plot.history.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_generic.R 3 | \name{plot.history} 4 | \alias{plot.history} 5 | \alias{plot.History} 6 | \title{Plot Method for Contextual History} 7 | \usage{ 8 | \method{plot}{History}(x, ...) 9 | } 10 | \arguments{ 11 | \item{x}{A \code{History} object.} 12 | 13 | \item{...}{Further plotting parameters.} 14 | } 15 | \description{ 16 | plot.history, a method for the plot generic. It is designed for a quick look at History data. 17 | } 18 | \seealso{ 19 | Core contextual classes: \code{\link{Simulator}}, 20 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}} 21 | 22 | Bandit classes: \code{\link{Bandit}}, \code{\link{BasicBernoulliBandit}}, 23 | \code{\link{OfflineReplayEvaluatorBandit}}, \code{\link{ContextualLogitBandit}} 24 | } 25 | -------------------------------------------------------------------------------- /man/print.history.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_generic.R 3 | \name{print.history} 4 | \alias{print.history} 5 | \alias{print.History} 6 | \title{Print Method for Contextual History} 7 | \usage{ 8 | \method{print}{History}(x, ...) 9 | } 10 | \arguments{ 11 | \item{x}{A \code{History} object.} 12 | 13 | \item{...}{Further plotting parameters.} 14 | } 15 | \description{ 16 | print.history, a method for the print generic. It is designed for a quick look at History data. 17 | } 18 | \seealso{ 19 | Core contextual classes: \code{\link{Simulator}}, 20 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}} 21 | 22 | Bandit classes: \code{\link{Bandit}}, \code{\link{BasicBernoulliBandit}}, 23 | \code{\link{OfflineReplayEvaluatorBandit}}, \code{\link{ContextualLogitBandit}} 24 | } 25 | -------------------------------------------------------------------------------- /man/prob_winner.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{prob_winner} 4 | \alias{prob_winner} 5 | \title{Binomial Win Probability} 6 | \usage{ 7 | prob_winner(post) 8 | } 9 | \arguments{ 10 | \item{post}{Simulated results from the posterior, as provided by sim_post()} 11 | } 12 | \value{ 13 | Probabilities each arm is the winner. 14 | } 15 | \description{ 16 | Function to compute probability that each arm is the winner, 17 | given simulated posterior results. 18 | } 19 | \examples{ 20 | 21 | x <- c(10,20,30,50) 22 | n <- c(100,102,120,130) 23 | betaPost <- sim_post(x,n) 24 | pw <- prob_winner(betaPost) 25 | 26 | } 27 | \author{ 28 | Thomas Lotze and Markus Loecher 29 | } 30 | -------------------------------------------------------------------------------- /man/sample_one_of.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{sample_one_of} 4 | \alias{sample_one_of} 5 | \title{Sample one element from vector or list} 6 | \usage{ 7 | sample_one_of(x) 8 | } 9 | \arguments{ 10 | \item{x}{A vector of one or more elements from which to choose} 11 | } 12 | \value{ 13 | One value, drawn from x. 14 | } 15 | \description{ 16 | Takes one sample from a vector or list. Does not throw an error for zero length lists. 17 | } 18 | -------------------------------------------------------------------------------- /man/set_external.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{set_external} 4 | \alias{set_external} 5 | \title{Change Default Graphing Device from RStudio} 6 | \usage{ 7 | set_external(ext = TRUE, width = 10, height = 6) 8 | } 9 | \arguments{ 10 | \item{ext}{A \code{logical} indicating whether to plot in a popup or within the RStudio UI.} 11 | 12 | \item{width}{Width in pixels of the popup window} 13 | 14 | \item{height}{Height in pixels of the popup window} 15 | } 16 | \description{ 17 | Checks to see if the user is in RStudio. If so, then it changes the device to a popup window. 18 | } 19 | \details{ 20 | Depending on the operating system, the default drivers attempted to be used are: 21 | 22 | OS X: quartz() 23 | 24 | Linux: x11() 25 | 26 | Windows: windows() 27 | 28 | Note, this setting is not permanent. Thus, the behavioral change will last 29 | until the end of the session. 30 | 31 | Also, the active graphing environment will be killed. 32 | As a result, any graphs that are open will be deleted. 33 | } 34 | \examples{ 35 | \dontrun{ 36 | 37 | # Turn on external graphs 38 | external_graphs() 39 | 40 | # Turn off external graphs 41 | external_graphs(F) 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /man/set_global_seed.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{set_global_seed} 4 | \alias{set_global_seed} 5 | \title{Set .Random.seed to a pre-saved value} 6 | \usage{ 7 | set_global_seed(x) 8 | } 9 | \arguments{ 10 | \item{x}{integer vector} 11 | } 12 | \description{ 13 | Set .Random.seed to a pre-saved value 14 | } 15 | -------------------------------------------------------------------------------- /man/sherman_morrisson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{sherman_morrisson} 4 | \alias{sherman_morrisson} 5 | \title{Sherman-Morrisson inverse} 6 | \usage{ 7 | sherman_morrisson(inv, x) 8 | } 9 | \arguments{ 10 | \item{inv}{to be updated inverse matrix} 11 | 12 | \item{x}{column vector to update inv with} 13 | } 14 | \description{ 15 | Sherman-Morrisson inverse 16 | } 17 | -------------------------------------------------------------------------------- /man/sim_post.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{sim_post} 4 | \alias{sim_post} 5 | \title{Binomial Posterior Simulator} 6 | \usage{ 7 | sim_post(x, n, alpha = 1, beta = 1, ndraws = 5000) 8 | } 9 | \arguments{ 10 | \item{x}{Vector of the number of successes per arm.} 11 | 12 | \item{n}{Vector of the number of trials per arm.} 13 | 14 | \item{alpha}{Shape parameter alpha for the prior beta distribution.} 15 | 16 | \item{beta}{Shape parameter beta for the prior beta distribution.} 17 | 18 | \item{ndraws}{Number of random draws from the posterior.} 19 | } 20 | \value{ 21 | Matrix of bayesian probabilities for each arm being the best binomial bandit 22 | } 23 | \description{ 24 | Simulates the posterior distribution of 25 | the Bayesian probabilities for each arm being the 26 | best binomial bandit. 27 | } 28 | \examples{ 29 | 30 | x <- c(10,20,30,50) 31 | n <- c(100,102,120,130) 32 | sp <- sim_post(x,n) 33 | 34 | } 35 | \author{ 36 | Thomas Lotze and Markus Loecher 37 | } 38 | -------------------------------------------------------------------------------- /man/sum_of.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{sum_of} 4 | \alias{sum_of} 5 | \title{Sum of list} 6 | \usage{ 7 | sum_of(x) 8 | } 9 | \arguments{ 10 | \item{x}{List} 11 | } 12 | \description{ 13 | Returns the sum of the values of the elements of a list \code{x}. 14 | } 15 | \details{ 16 | If there is a tie, and equal_is_random is TRUE, 17 | the index of one of the tied maxima is returned at random. Otherwise, 18 | the value with the lowest index is returned. 19 | } 20 | \examples{ 21 | 22 | theta = list(par_one = list(1,2,3), par_two = list(2,3,4)) 23 | sum_of(theta$par_one) 24 | 25 | } 26 | -------------------------------------------------------------------------------- /man/summary.history.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_generic.R 3 | \name{summary.history} 4 | \alias{summary.history} 5 | \alias{summary.History} 6 | \title{Summary Method for Contextual History} 7 | \usage{ 8 | \method{summary}{History}(object, ...) 9 | } 10 | \arguments{ 11 | \item{object}{A \code{History} object.} 12 | 13 | \item{...}{Further summary parameters.} 14 | } 15 | \description{ 16 | summary.history, a method for the summary generic. It is designed for a quick summary of History data. 17 | } 18 | \seealso{ 19 | Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}}, 20 | \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}} 21 | 22 | Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}}, 23 | \code{\link{OfflineReplayEvaluatorBandit}} 24 | 25 | Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}} 26 | } 27 | -------------------------------------------------------------------------------- /man/value_remaining.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{value_remaining} 4 | \alias{value_remaining} 5 | \title{Potential Value Remaining} 6 | \usage{ 7 | value_remaining(x, n, alpha = 1, beta = 1, ndraws = 10000) 8 | } 9 | \arguments{ 10 | \item{x}{Vector of the number of successes per arm.} 11 | 12 | \item{n}{Vector of the number of trials per arm.} 13 | 14 | \item{alpha}{Shape parameter alpha for the prior beta distribution.} 15 | 16 | \item{beta}{Shape parameter beta for the prior beta distribution.} 17 | 18 | \item{ndraws}{Number of random draws from the posterior.} 19 | } 20 | \value{ 21 | Value_remaining distribution; the distribution of 22 | improvement amounts that another arm might have over the current best arm. 23 | } 24 | \description{ 25 | Compute "value_remaining" in arms not 26 | currently best in binomial bandits 27 | } 28 | \examples{ 29 | 30 | x <- c(10,20,30,80) 31 | n <- c(100,102,120,240) 32 | vr <- value_remaining(x, n) 33 | hist(vr) 34 | 35 | # "potential value" remaining in the experiment 36 | potential_value <- quantile(vr, 0.95) 37 | 38 | } 39 | \author{ 40 | Thomas Lotze and Markus Loecher 41 | } 42 | -------------------------------------------------------------------------------- /man/var_welford.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{var_welford} 4 | \alias{var_welford} 5 | \title{Welford's variance} 6 | \usage{ 7 | var_welford(z) 8 | } 9 | \arguments{ 10 | \item{z}{vector} 11 | } 12 | \value{ 13 | variance 14 | } 15 | \description{ 16 | Welford described a method for 'robust' one-pass computation of the 17 | standard deviation. By 'robust', we mean robust to round-off caused 18 | by a large shift in the mean. 19 | } 20 | -------------------------------------------------------------------------------- /man/which_max_list.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{which_max_list} 4 | \alias{which_max_list} 5 | \title{Get maximum value in list} 6 | \usage{ 7 | which_max_list(x, equal_is_random = TRUE) 8 | } 9 | \arguments{ 10 | \item{x}{vector of values} 11 | 12 | \item{equal_is_random}{boolean} 13 | } 14 | \description{ 15 | Returns the index of the maximum value in list \code{x}. 16 | } 17 | \details{ 18 | If there is a tie and \code{equal_is_random} is \code{TRUE}, 19 | the index of one of the tied maxima is returned at random. 20 | 21 | If \code{equal_is_random} is \code{FALSE}, 22 | the maximum with the lowest index number is returned. 23 | } 24 | \examples{ 25 | 26 | theta = list(par_one = list(1,2,3), par_two = list(2,3,4)) 27 | which_max_list(theta$par_one) 28 | 29 | } 30 | -------------------------------------------------------------------------------- /man/which_max_tied.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/functions_utility.R 3 | \name{which_max_tied} 4 | \alias{which_max_tied} 5 | \title{Get maximum value randomly breaking ties} 6 | \usage{ 7 | which_max_tied(x, equal_is_random = TRUE) 8 | } 9 | \arguments{ 10 | \item{x}{vector of values} 11 | 12 | \item{equal_is_random}{boolean} 13 | } 14 | \description{ 15 | Returns the index of the maximum value in vector \code{vec}. 16 | } 17 | \details{ 18 | If there is a tie, and equal_is_random is TRUE, 19 | the index of one of the tied maxima is returned at random. Otherwise, 20 | the value with the lowest index is returned. 21 | } 22 | -------------------------------------------------------------------------------- /tests/figs/deps.txt: -------------------------------------------------------------------------------- 1 | - vdiffr-svg-engine: 1.0 2 | - vdiffr: 0.3.2.2 3 | - freetypeharfbuzz: 0.2.5 4 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | Sys.setenv("R_TESTS" = "") 2 | 3 | library(testthat) 4 | library(contextual) 5 | 6 | test_check("contextual") 7 | -------------------------------------------------------------------------------- /tests/testthat/setup_tests.R: -------------------------------------------------------------------------------- 1 | suppressWarnings(RNGversion("3.5.0")) -------------------------------------------------------------------------------- /tests/testthat/teardown_tests.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | delete_files <- dir(path=".", pattern="*.csv") 4 | file.remove(delete_files) 5 | 6 | delete_files <- dir(path=".", pattern="*.pdf") 7 | file.remove(delete_files) 8 | 9 | delete_files <- dir(path=".", pattern="*.svg") 10 | file.remove(delete_files) 11 | 12 | delete_files <- dir(path=".", pattern="*.log") 13 | file.remove(delete_files) 14 | -------------------------------------------------------------------------------- /tests/testthat/test_agent.R: -------------------------------------------------------------------------------- 1 | context("Agent") 2 | 3 | test_that("Agent", { 4 | 5 | policy <- EpsilonGreedyPolicy$new(epsilon = 0.1) 6 | expect_identical(typeof(policy), "environment") 7 | 8 | bandit <- BasicBernoulliBandit$new(c(0.6, 0.1, 0.1)) 9 | expect_identical(typeof(bandit), "environment") 10 | 11 | agent <- Agent$new(policy, bandit, name = "testme", sparse = 0.5) 12 | expect_identical(typeof(agent), "environment") 13 | expect_equal(agent$name, "testme") 14 | expect_equal(agent$sparse, 0.5) 15 | expect_equal(agent$bandit$d, 1) 16 | expect_equal(agent$bandit$k, 3) 17 | expect_equal(agent$policy$class_name, "EpsilonGreedyPolicy") 18 | expect_equal(agent$policy$epsilon, 0.1) 19 | expect_equal(agent$policy$theta$mean[[1]], 0) 20 | 21 | history <- Simulator$new(agents = agent, 22 | horizon = 10, 23 | simulations = 10, 24 | do_parallel = FALSE, 25 | log_interval = 1, 26 | progress_file = TRUE)$run() 27 | 28 | expect_identical(history$cumulative$testme$reward,0.4) 29 | 30 | t <- agent$get_t() 31 | agent$set_t(t+1) 32 | t <- agent$get_t() 33 | 34 | expect_identical(agent$get_t(),1) 35 | 36 | Sys.sleep(0.1) 37 | expect_true(file.exists("parallel.log")) 38 | expect_true(file.exists("workers_progress.log")) 39 | expect_true(file.exists("agents_progress.log")) 40 | if (file.exists("workers_progress.log")) file.remove("workers_progress.log") 41 | if (file.exists("agents_progress.log")) file.remove("agents_progress.log") 42 | if (file.exists("progress.log")) file.remove("progress.log") 43 | 44 | }) 45 | -------------------------------------------------------------------------------- /tests/testthat/test_policies.R: -------------------------------------------------------------------------------- 1 | context("Policies") 2 | 3 | test_that("ContextualLogitBTSPolicy simulation", { 4 | 5 | horizon <- 20L 6 | simulations <- 10L 7 | 8 | bandit <- ContextualLinearBandit$new(k = 5, d = 5, binary_rewards = TRUE) 9 | 10 | agents <-list( 11 | Agent$new(ContextualLogitBTSPolicy$new(), bandit) 12 | ) 13 | 14 | simulation <- Simulator$new(agents, horizon, simulations, do_parallel = FALSE) 15 | history <- simulation$run() 16 | 17 | expect_equal(history$cumulative$ContextualLogitBTS$cum_reward, 6.2, tolerance = 0.01) 18 | expect_equal(history$cumulative$ContextualLogitBTS$cum_regret, 11.6, tolerance = 0.01) 19 | 20 | }) 21 | -------------------------------------------------------------------------------- /tests/testthat/test_policy.R: -------------------------------------------------------------------------------- 1 | context("Policy") 2 | 3 | test_that("Policy", { 4 | 5 | policy <- Policy$new() 6 | expect_identical(typeof(policy), "environment") 7 | 8 | policy$theta_to_arms <- list(n=3) 9 | theta <- policy$initialize_theta(4) 10 | expect_identical(theta$n[[4]], 3) 11 | 12 | expect_identical(policy$class_name, "Policy") 13 | expect_error(policy$get_action(), "has not been implemented") 14 | expect_error(policy$set_reward(), "has not been implemented") 15 | 16 | }) 17 | -------------------------------------------------------------------------------- /tests/testthat/test_utility_functions.R: -------------------------------------------------------------------------------- 1 | context("Utility Functions") 2 | 3 | test_that("Inc and Dec", { 4 | 5 | x <- 1:5 6 | inc(x) <- 5 7 | expect_equal(6:10, x) 8 | 9 | dec(x) <- 5 10 | expect_equal(1:5, x) 11 | 12 | }) 13 | 14 | test_that("which_max_list", { 15 | set.seed(1) 16 | 17 | theta = list(par_one = list(1,2,2), par_two = list(2,3,4)) 18 | expect_equal(which_max_list(theta$par_one, equal_is_random = TRUE), 2) 19 | 20 | theta = list(par_one = list(1,2,2), par_two = list(2,3,4)) 21 | expect_equal(which_max_list(theta$par_one, equal_is_random = FALSE), 2) 22 | }) 23 | 24 | 25 | test_that("Welford", { 26 | set.seed(42) 27 | v <- sample(20) 28 | s <- var_welford(v) 29 | expect_equal(s,35) 30 | }) 31 | 32 | test_that("Formatted difftime", { 33 | ft <- formatted_difftime(difftime(strftime ("2019-10-18 13:35:35 CEST"), 34 | strftime ("2018-09-17 12:31:30 CEST"))) 35 | expect_equal(ft,"396 days, 1:04:05") 36 | }) 37 | 38 | test_that("Inverse Logit", { 39 | expect_equal(invlogit(10),0.9999546, tolerance = 0.002) 40 | }) 41 | 42 | test_that("InvGamma", { 43 | 44 | s <- seq(0, 5, .01) 45 | x <- dinvgamma(s, 7, 10) 46 | x2 <- dinvgamma(s, 7, scale = 0.10) 47 | 48 | expect_equal_to_reference(x, file = "dinvgamma1.rds") 49 | expect_equal_to_reference(x2, file = "dinvgamma2.rds") 50 | 51 | x2 <- dinvgamma(s, 7, 10, log = TRUE) 52 | expect_equal_to_reference(x2, file = "logdiv.rds") 53 | 54 | q <- 2 55 | (p <- pinvgamma(q, 7, 10)) 56 | expect_equal(qinvgamma(p, 7, 10), q) 57 | 58 | q <- 2 59 | (p <- pinvgamma(q, 7, scale = 0.10)) 60 | expect_equal(qinvgamma(p, 7, scale = 0.10), q) 61 | 62 | expect_equal(mean(rinvgamma(1e5, 7, 10) <= q),0.76088, tolerance = 0.002) 63 | 64 | expect_equal(mean(rinvgamma(1e5, 7, scale = 0.10) <= q),0.763, tolerance = 0.02) 65 | 66 | }) 67 | 68 | -------------------------------------------------------------------------------- /vignettes/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/1.png -------------------------------------------------------------------------------- /vignettes/Rplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/Rplot.png -------------------------------------------------------------------------------- /vignettes/basic_epsilon_greedy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/basic_epsilon_greedy.png -------------------------------------------------------------------------------- /vignettes/carskit_depaul.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/carskit_depaul.png -------------------------------------------------------------------------------- /vignettes/cmabs.R: -------------------------------------------------------------------------------- 1 | ## ----setup, include = FALSE, cache = TRUE-------------------------------- 2 | knitr::opts_chunk$set( 3 | collapse = TRUE, 4 | comment = "#>" 5 | ) 6 | 7 | -------------------------------------------------------------------------------- /vignettes/cmabs.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Demo: Basic Synthetic cMAB Policies" 3 | author: "Robin van Emden" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Demo: Basic Synthetic cMAB Policies} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | ```{r setup, include = FALSE, cache = TRUE} 13 | knitr::opts_chunk$set( 14 | collapse = TRUE, 15 | comment = "#>" 16 | ) 17 | ``` 18 | 19 | ```r 20 | library(contextual) 21 | 22 | horizon <- 100L 23 | simulations <- 100L 24 | 25 | bandit <- ContextualLinearBandit$new(k = 4, d = 3, sigma = 0.3) 26 | 27 | # Linear CMAB policies comparison 28 | 29 | agents <- list(Agent$new(EpsilonGreedyPolicy$new(0.1), bandit, "EGreedy"), 30 | Agent$new(ContextualEpsilonGreedyPolicy$new(0.1), bandit, "cEGreedy"), 31 | Agent$new(ContextualLinTSPolicy$new(0.1), bandit, "LinTS"), 32 | Agent$new(LinUCBDisjointOptimizedPolicy$new(0.6), bandit, "LinUCB")) 33 | 34 | simulation <- Simulator$new(agents, horizon, simulations, do_parallel = TRUE) 35 | 36 | history <- simulation$run() 37 | 38 | plot(history, type = "cumulative", rate = FALSE, legend_position = "topleft") 39 | 40 | ``` 41 | 42 | ![](cmabs.png) 43 | -------------------------------------------------------------------------------- /vignettes/cmabs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/cmabs.png -------------------------------------------------------------------------------- /vignettes/cmabsoffline.R: -------------------------------------------------------------------------------- 1 | ## ----setup, include = FALSE, cache = TRUE-------------------------------- 2 | knitr::opts_chunk$set( 3 | collapse = TRUE, 4 | comment = "#>" 5 | ) 6 | 7 | -------------------------------------------------------------------------------- /vignettes/cmabsoffline.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Demo: Offline cMAB LinUCB evaluation" 3 | author: "Robin van Emden" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Demo: Offline cMAB LinUCB evaluation} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | ```{r setup, include = FALSE, cache = TRUE} 13 | knitr::opts_chunk$set( 14 | collapse = TRUE, 15 | comment = "#>" 16 | ) 17 | ``` 18 | 19 | ```r 20 | library(contextual) 21 | library(data.table) 22 | 23 | # Import personalization data-set 24 | 25 | library(contextual); library(data.table) 26 | 27 | dt <- fread("http://d1ie9wlkzugsxr.cloudfront.net/data_cmab_basic/data.txt") 28 | # 0/1 reward, 10 arms, 100 features 29 | # arms always start from 1 30 | 31 | # z y x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15 .. x100 32 | # 1: 2 0 5 0 0 37 6 0 0 0 0 25 0 0 7 1 0 .. 0 33 | # 2: 8 0 1 3 36 0 0 0 0 0 0 0 0 1 0 0 0 .. 10 34 | # 3: . . . . . . . . . . . . . . . . . .. . 35 | 36 | horizon <- nrow(dt) 37 | simulations <- 1 38 | 39 | # Set up formula: y ~ z | x1 + x2 + .. 40 | # In bandit parlance: reward ~ arms | covariates or contextual features 41 | 42 | f <- y ~ z | . - z 43 | 44 | # Instantiate Replay Bandit (Li, 2010) 45 | bandit <- OfflineReplayEvaluatorBandit$new(formula = f, data = dt) 46 | 47 | # Bind Policies withs Bandits through Agents, add Agents to list 48 | agents <- list(Agent$new(LinUCBDisjointOptimizedPolicy$new(0.01), bandit, "alpha = 0.01"), 49 | Agent$new(LinUCBDisjointOptimizedPolicy$new(0.05), bandit, "alpha = 0.05"), 50 | Agent$new(LinUCBDisjointOptimizedPolicy$new(0.1), bandit, "alpha = 0.1"), 51 | Agent$new(LinUCBDisjointOptimizedPolicy$new(1.0), bandit, "alpha = 1.0")) 52 | 53 | # Instantiate a Simulator 54 | simulation <- Simulator$new(agents, horizon = nrow(dt), simulations = 1) 55 | 56 | # Run the simulation. 57 | history <- simulation$run() 58 | 59 | # plot the results 60 | plot(history, type = "cumulative", regret = FALSE, rate = TRUE, 61 | legend_position = "bottomright", ylim = c(0,1)) 62 | ``` 63 | 64 | ![](linucboffline.png) 65 | -------------------------------------------------------------------------------- /vignettes/compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/compare.png -------------------------------------------------------------------------------- /vignettes/contextual-fig-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/contextual-fig-1.png -------------------------------------------------------------------------------- /vignettes/contextual-fig-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/contextual-fig-2.png -------------------------------------------------------------------------------- /vignettes/eckles_kaptein.R: -------------------------------------------------------------------------------- 1 | ## ----setup, include = FALSE, cache = TRUE-------------------------------- 2 | knitr::opts_chunk$set( 3 | collapse = TRUE, 4 | comment = "#>" 5 | ) 6 | 7 | -------------------------------------------------------------------------------- /vignettes/eckles_kaptein.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Demo: MAB Replication Eckles & Kaptein (Bootstrap Thompson Sampling)" 3 | author: "Robin van Emden" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Demo: MAB Replication Eckles & Kaptein (Bootstrap Thompson Sampling)} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | ```{r setup, include = FALSE, cache = TRUE} 13 | knitr::opts_chunk$set( 14 | collapse = TRUE, 15 | comment = "#>" 16 | ) 17 | ``` 18 | 19 | ```r 20 | library(contextual) 21 | 22 | # Replication of THOMPSON SAMPLING WITH THE ONLINE BOOTSTRAP By Dean Eckles and Maurits Kaptein 23 | 24 | # This evaluations takes time - up to a few hours when run single core. 25 | 26 | # Running the script in parallel (for example, on 8 cores) 27 | # shortens the evaluation time substantially. 28 | 29 | # https://arxiv.org/abs/1410.4009 30 | 31 | # Fig 2. Empirical regret for Thompson sampling and BTS in a K-armed binomial bandit problem. 32 | 33 | bandit <- BasicBernoulliBandit$new(weights = c(0.5, rep(0.4,9))) 34 | 35 | agents <- list(Agent$new(BootstrapTSPolicy$new(1000), bandit, "BTS 1000"), 36 | Agent$new(ThompsonSamplingPolicy$new(), bandit, "TS")) 37 | 38 | simulator <- Simulator$new(agents = agents, 39 | do_parallel = TRUE, 40 | save_interval = 50, 41 | set_seed = 999, 42 | horizon = 1e+05, 43 | simulations = 1000) 44 | 45 | simulator$run() 46 | 47 | plot(simulator$history, log = "x") 48 | 49 | ``` 50 | 51 | ![](eckles_kaptein_1.png) 52 | -------------------------------------------------------------------------------- /vignettes/eckles_kaptein_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/eckles_kaptein_0.png -------------------------------------------------------------------------------- /vignettes/eckles_kaptein_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/eckles_kaptein_1.png -------------------------------------------------------------------------------- /vignettes/eg_average_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/eg_average_reward.png -------------------------------------------------------------------------------- /vignettes/eg_cumulative_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/eg_cumulative_reward.png -------------------------------------------------------------------------------- /vignettes/eg_incorrect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/eg_incorrect.png -------------------------------------------------------------------------------- /vignettes/eg_optimal_action.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/eg_optimal_action.png -------------------------------------------------------------------------------- /vignettes/epsilongreedy.R: -------------------------------------------------------------------------------- 1 | ## ----setup, include = FALSE, cache = TRUE-------------------------------- 2 | knitr::opts_chunk$set( 3 | collapse = TRUE, 4 | comment = "#>" 5 | ) 6 | 7 | -------------------------------------------------------------------------------- /vignettes/epsilongreedy.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Demo: Basic Epsilon Greedy" 3 | author: "Robin van Emden" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Demo: Basic Epsilon Greed} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | ```{r setup, include = FALSE, cache = TRUE} 13 | knitr::opts_chunk$set( 14 | collapse = TRUE, 15 | comment = "#>" 16 | ) 17 | ``` 18 | 19 | ```r 20 | library(contextual) 21 | 22 | policy <- EpsilonGreedyPolicy$new(epsilon = 0.1) 23 | 24 | bandit <- BasicBernoulliBandit$new(weights = c(0.6, 0.1, 0.1)) 25 | agent <- Agent$new(policy,bandit) 26 | 27 | simulator <- Simulator$new(agents = agent, 28 | horizon = 100, 29 | simulations = 1000) 30 | 31 | history <- simulator$run() 32 | 33 | plot(history, type = "cumulative", regret = TRUE, disp = "ci", 34 | traces_max = 100, traces_alpha = 0.1, traces = TRUE) 35 | ``` 36 | 37 | ![](basic_epsilon_greedy.png) 38 | 39 | ```r 40 | summary(history) 41 | 42 | ``` 43 | 44 | ```r 45 | Agents: 46 | 47 | EpsilonGreedy 48 | 49 | Cumulative regret: 50 | 51 | agent t sims cum_regret cum_regret_var cum_regret_sd 52 | EpsilonGreedy 100 1000 8.951 116.7133 10.80339 53 | 54 | 55 | Cumulative reward: 56 | 57 | agent t sims cum_reward cum_reward_var cum_reward_sd 58 | EpsilonGreedy 100 1000 51.09 141.6215 11.90048 59 | 60 | 61 | Cumulative reward rate: 62 | 63 | agent t sims cur_reward cur_reward_var cur_reward_sd 64 | EpsilonGreedy 100 1000 0.5109 1.416215 0.1190048 65 | ``` 66 | -------------------------------------------------------------------------------- /vignettes/introduction.R: -------------------------------------------------------------------------------- 1 | ## ----setup, include = FALSE, cache = TRUE------------------------------------- 2 | knitr::opts_chunk$set( 3 | collapse = TRUE, 4 | comment = "#>" 5 | ) 6 | 7 | -------------------------------------------------------------------------------- /vignettes/linucboffline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/linucboffline.png -------------------------------------------------------------------------------- /vignettes/mabs.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Demo: MAB Policies Comparison" 3 | author: "Robin van Emden" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Demo: MAB Policies Comparison} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | ```{r setup, include = FALSE, cache = TRUE} 13 | knitr::opts_chunk$set( 14 | collapse = TRUE, 15 | comment = "#>" 16 | ) 17 | ``` 18 | 19 | ```r 20 | library(contextual) 21 | 22 | prob_per_arm <- c(0.9, 0.1, 0.1) 23 | horizon <- 100 24 | simulations <- 1000 25 | 26 | bandit <- BasicBernoulliBandit$new(prob_per_arm) 27 | 28 | agents <- list(Agent$new(OraclePolicy$new(), bandit), 29 | Agent$new(EpsilonGreedyPolicy$new(0.1), bandit), 30 | Agent$new(ThompsonSamplingPolicy$new(1.0, 1.0), bandit), 31 | Agent$new(Exp3Policy$new(0.1), bandit), 32 | Agent$new(GittinsBrezziLaiPolicy$new(), bandit), 33 | Agent$new(UCB1Policy$new(), bandit), 34 | Agent$new(UCB2Policy$new(0.1), bandit)) 35 | 36 | simulation <- Simulator$new(agents, horizon, simulations) 37 | history <- simulation$run() 38 | 39 | plot(history, type = "cumulative") 40 | ``` 41 | 42 | ![](mabs.png) 43 | -------------------------------------------------------------------------------- /vignettes/mabs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/mabs.png -------------------------------------------------------------------------------- /vignettes/ml10m.R: -------------------------------------------------------------------------------- 1 | ## ----setup, include = FALSE, cache = TRUE-------------------------------- 2 | knitr::opts_chunk$set( 3 | collapse = TRUE, 4 | comment = "#>" 5 | ) 6 | 7 | -------------------------------------------------------------------------------- /vignettes/ml10m.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/ml10m.png -------------------------------------------------------------------------------- /vignettes/offline_depaul_movies.R: -------------------------------------------------------------------------------- 1 | ## ----setup, include = FALSE, cache = TRUE-------------------------------- 2 | knitr::opts_chunk$set( 3 | collapse = TRUE, 4 | comment = "#>" 5 | ) 6 | 7 | -------------------------------------------------------------------------------- /vignettes/offline_depaul_movies.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Demo: Offline cMAB: CarsKit DePaul Movie Dataset" 3 | author: "Robin van Emden" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Demo: Offline cMAB: CarsKit DePaul Movie Dataset} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | ```{r setup, include = FALSE, cache = TRUE} 13 | knitr::opts_chunk$set( 14 | collapse = TRUE, 15 | comment = "#>" 16 | ) 17 | ``` 18 | 19 | ```r 20 | library(contextual) 21 | library(data.table) 22 | 23 | # Import personalization data-set 24 | 25 | # Info: https://d1ie9wlkzugsxr.cloudfront.net/data_irecsys_CARSKit/Movie_DePaulMovie/README.txt 26 | 27 | url <- "http://d1ie9wlkzugsxr.cloudfront.net/data_irecsys_CARSKit/Movie_DePaulMovie/ratings.csv" 28 | data <- fread(url, stringsAsFactors=TRUE) 29 | 30 | # Convert data 31 | 32 | data <- contextual::one_hot(data, cols = c("Time","Location","Companion"), sparsifyNAs = TRUE) 33 | data[, itemid := as.numeric(itemid)] 34 | data[, rating := ifelse(rating <= 3, 0, 1)] 35 | 36 | # Set simulation parameters. 37 | simulations <- 10 # here, "simulations" represents the number of boostrap samples 38 | horizon <- nrow(data) 39 | 40 | # Initiate Replay bandit with 10 arms and 100 context dimensions 41 | # Arms always start with 1 for the first arm 42 | log_S <- data 43 | formula <- formula("rating ~ itemid | Time_Weekday + Time_Weekend + Location_Cinema + 44 | Location_Home + Companion_Alone + Companion_Family + 45 | Companion_Partner") 46 | bandit <- OfflineBootstrappedReplayBandit$new(formula = formula, data = data) 47 | 48 | # Define agents. 49 | agents <- 50 | list(Agent$new(RandomPolicy$new(), bandit, "Random"), 51 | Agent$new(EpsilonGreedyPolicy$new(0.03), bandit, "EGreedy 0.05"), 52 | Agent$new(ThompsonSamplingPolicy$new(), bandit, "ThompsonSampling"), 53 | Agent$new(LinUCBDisjointOptimizedPolicy$new(0.37), bandit, "LinUCB 0.37")) 54 | 55 | # Initialize the simulation. 56 | simulation <- 57 | Simulator$new( 58 | agents = agents, 59 | simulations = simulations, 60 | horizon = horizon 61 | ) 62 | 63 | # Run the simulation. 64 | # Takes +- 5 minutes: bootstrapbandit loops through arms x horizon x simulations (times nr of agents). 65 | sim <- simulation$run() 66 | 67 | # plot the results 68 | plot(sim, type = "cumulative", regret = FALSE, rate = TRUE, 69 | legend_position = "topleft", ylim=c(0.48,0.87)) 70 | 71 | ``` 72 | 73 | ![](carskit_depaul.png) 74 | -------------------------------------------------------------------------------- /vignettes/replication-fig-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/replication-fig-1.png -------------------------------------------------------------------------------- /vignettes/replication-fig-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/replication-fig-2.png -------------------------------------------------------------------------------- /vignettes/replication.R: -------------------------------------------------------------------------------- 1 | ## ----setup, include = FALSE, cache = TRUE-------------------------------- 2 | knitr::opts_chunk$set( 3 | collapse = TRUE, 4 | comment = "#>" 5 | ) 6 | 7 | -------------------------------------------------------------------------------- /vignettes/simpsons.R: -------------------------------------------------------------------------------- 1 | ## ----setup, include = FALSE, cache = TRUE------------------------------------- 2 | knitr::opts_chunk$set( 3 | collapse = TRUE, 4 | comment = "#>" 5 | ) 6 | 7 | -------------------------------------------------------------------------------- /vignettes/softmax_average_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/softmax_average_reward.png -------------------------------------------------------------------------------- /vignettes/softmax_cumulative_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/softmax_cumulative_reward.png -------------------------------------------------------------------------------- /vignettes/softmax_optimal_action.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/softmax_optimal_action.png -------------------------------------------------------------------------------- /vignettes/sutton_barto.R: -------------------------------------------------------------------------------- 1 | ## ----setup, include = FALSE, cache = TRUE-------------------------------- 2 | knitr::opts_chunk$set( 3 | fig.pos = 'H', 4 | collapse = TRUE, 5 | comment = "#>" 6 | ) 7 | 8 | -------------------------------------------------------------------------------- /vignettes/sutton_eg_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/sutton_eg_1.png -------------------------------------------------------------------------------- /vignettes/sutton_eg_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/sutton_eg_2.png -------------------------------------------------------------------------------- /vignettes/sutton_gradient.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/sutton_gradient.png -------------------------------------------------------------------------------- /vignettes/sutton_optimistic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/sutton_optimistic.png -------------------------------------------------------------------------------- /vignettes/sutton_ucb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/sutton_ucb.png -------------------------------------------------------------------------------- /vignettes/sutton_violin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/sutton_violin.png -------------------------------------------------------------------------------- /vignettes/ucb_average_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/ucb_average_reward.png -------------------------------------------------------------------------------- /vignettes/ucb_cumulative_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/ucb_cumulative_reward.png -------------------------------------------------------------------------------- /vignettes/ucb_optimal_action.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nth-iteration-labs/contextual/2db12ce5cd9ec3c4b0bf14d6ddd9a2ac785b30f3/vignettes/ucb_optimal_action.png -------------------------------------------------------------------------------- /vignettes/website_optimization.R: -------------------------------------------------------------------------------- 1 | ## ----setup, include = FALSE, cache = TRUE-------------------------------- 2 | knitr::opts_chunk$set( 3 | fig.pos = 'H', 4 | collapse = TRUE, 5 | comment = "#>" 6 | ) 7 | 8 | --------------------------------------------------------------------------------