├── .Rhistory ├── NAMESPACE ├── README~ ├── MAB.Rproj ├── DESCRIPTION ├── README.md ├── test ├── run_tests.R └── test_MAB.R ├── CONTRIBUTING.md ├── man ├── CalculateWeight.Rd ├── SimulateMultipleMethods.Rd └── SimulateMultiplePeriods.Rd ├── LICENSE └── R ├── CalculateWeight.R ├── CalculateWeight.R~ ├── SimulateMultipleMethods.R ├── SimulateMultipleMethods.R~ ├── SimulateMultiplePeriods.R └── SimulateMultiplePeriods.R~ /.Rhistory: -------------------------------------------------------------------------------- 1 | install_github("google/MAB") 2 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(CalculateWeight) 4 | export(SimulateMultipleMethods) 5 | export(SimulateMultiplePeriods) 6 | -------------------------------------------------------------------------------- /README~: -------------------------------------------------------------------------------- 1 | This is not an official Google product. 2 | 3 | To install MAB package, download the package and run the following code in 4 | command line: 5 | R CMD INSTALL FILE.PATH 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /MAB.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | Encoding: UTF-8 9 | 10 | AutoAppendNewline: Yes 11 | StripTrailingWhitespace: Yes 12 | 13 | BuildType: Package 14 | PackageUseDevtools: Yes 15 | PackageInstallArgs: --no-multiarch --with-keep.source 16 | PackageRoxygenize: rd,collate,namespace 17 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: MAB 2 | Title: Multi-Armed Bandit Strategies Implementation and Simulation 3 | Version: 1.0.0 4 | Authors@R: c( 5 | person("Yunbo", "Ouyang", email = "youyang4@illinois.edu", role = c("aut")), 6 | person("Shuchao", "Bi", email = "shuchaobi@google.com", role = c("aut", "cre")), 7 | person("Zoey", "Chu", email = "chuwanghuan@google.com", role = c("aut")), 8 | person("Chunqiu", "Zeng", email = "chunqiuzeng@google.com", role = c("aut"))) 9 | Description: Implement various multi-armed bandit strategies and 10 | conduct comparison studies via simulation. 11 | Depends: 12 | R (>= 3.2.2), ggplot2, emre, assertthat 13 | License: Apache License 2.0 14 | Encoding: UTF-8 15 | LazyData: true 16 | RoxygenNote: 5.0.1 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MAB 2 | 3 | R package MAB is created to 4 | implement strategies for stationary and non-stationary multi-armed bandit problems. 5 | Various widely-used strategies and their ensembles are included in this package. 6 | This package is designed to compare different strategies in multi-armed bandit problems 7 | and help users to choose suitable strategies with suitable tuning parameters 8 | in different scenarios. This is not an official Google product. 9 | 10 | 11 | Install the Package 12 | ------------------- 13 | 14 | MAB depends on R package [emre](https://github.com/google/emre). To install MAB package, download the package and run the following code in 15 | command line: 16 | 17 | R CMD INSTALL FILE.PATH 18 | 19 | Another way is to install devtools package first and then run the following code 20 | in R: 21 | 22 | library(devtools) 23 | 24 | install_github("google/MAB") 25 | 26 | 27 | -------------------------------------------------------------------------------- /test/run_tests.R: -------------------------------------------------------------------------------- 1 | # Copyright 2012-2017 Google 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | library(testthat) 15 | 16 | source("/google/src/cloud/youyang/rpackage/google3/experimental/users/youyang/MAB/R/CalculateWeight.R") 17 | 18 | test_results <- test_dir("/google/src/cloud/youyang/rpackage/google3/experimental/users/youyang/MAB/test", 19 | reporter="summary") 20 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution, 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | -------------------------------------------------------------------------------- /test/test_MAB.R: -------------------------------------------------------------------------------- 1 | # Copyright 2012-2017 Google 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | test_that("Test Epsilon-Greedy",{ 15 | expect_equal(CalculateWeight(method = "Epsilon-Greedy", 16 | method.par = list(epsilon = 0.01), 17 | all.event = data.frame(reward = 1:3, 18 | trial = rep(10, 3)), 19 | reward.family = "Bernoulli"), 20 | c(0.005, 0.005, 0.990))}) 21 | test_that("Test Epsilon-Decreasing",{ 22 | expect_equal(CalculateWeight(method = "Epsilon-Decreasing", 23 | method.par = list(epsilon = 0.01), 24 | all.event = data.frame(reward = 1:3, 25 | trial = rep(10, 3)), 26 | reward.family = "Bernoulli"), 27 | c(0.005, 0.005, 0.990))}) 28 | test_that("Test UCB",{ 29 | expect_equal(CalculateWeight(method = "UCB", 30 | method.par = list(epsilon = 0.01), 31 | all.event = data.frame(reward = 1:3, 32 | trial = rep(10, 3)), 33 | reward.family = "Bernoulli"), 34 | c(0, 0, 1))}) 35 | -------------------------------------------------------------------------------- /man/CalculateWeight.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CalculateWeight.R 3 | \name{CalculateWeight} 4 | \alias{CalculateWeight} 5 | \title{Calculate the probability of pulling each arm in the next period for various 6 | strategies} 7 | \usage{ 8 | CalculateWeight(method = "Thompson-Sampling", method.par = list(ndraws.TS = 9 | 1000), all.event, reward.family, sd.reward = NULL, period = 1, 10 | EXP3Info = NULL) 11 | } 12 | \arguments{ 13 | \item{method}{A character string choosing from "Epsilon-Greedy", 14 | "Epsilon-Decreasing", "Thompson-Sampling", 15 | "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling", 16 | "EXP3-Thompson-Sampling", 17 | "Greedy-Bayes-Poisson-TS" and "EXP3-Bayes-Poisson-TS". 18 | See \code{\link{SimulateMultiplePeriods}} for more details. 19 | Default is "Thompson-Sampling".} 20 | 21 | \item{method.par}{A list of parameters needed for different methods: 22 | 23 | \code{epsilon}: A real number between 0 and 1; needed for "Epsilon-Greedy", 24 | "Epsilon-Decreasing", "Greedy-Thompson-Sampling" and 25 | "Greedy-Bayes-Poisson-TS". 26 | 27 | \code{ndraws.TS}: A positive integer specifying the number of random draws 28 | from the posterior; 29 | needed for "Thompson-Sampling", "Greedy-Thompson-Sampling" and 30 | "EXP3-Thompson-Sampling". Default is 1000. 31 | 32 | \code{EXP3}: A list consisting of two real numbers \code{eta} and 33 | \code{gamma}; 34 | \eqn{eta > 0} and \eqn{0 <= gamma < 1}; needed for "EXP3", 35 | "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS". 36 | 37 | \code{BP}: A list consisting of three postive integers \code{iter.BP}, 38 | \code{ndraws.BP} and \code{interval.BP}; 39 | needed for "Bayes-Poisson-TS", "Greedy-Bayes-Poisson-TS" and 40 | "EXP3-Bayes-Poisson-TS"; \code{iter.BP} specifies the number of iterations 41 | to compute posterior; 42 | \code{ndraws.BP} specifies the number of posterior samples drawn from 43 | posterior distribution; \code{interval.BP} is specified to draw each 44 | posterior sample from 45 | a sample sequence of length \code{interval.BP}.} 46 | 47 | \item{all.event}{A data frame containing two columns \code{trial} and 48 | \code{reward} with the number of rows equal to the number of arms. 49 | Each element of \code{trial} and \code{reward} represents the number of 50 | trials and the total reward for each arm respectively.} 51 | 52 | \item{reward.family}{A character string specifying the distribution family 53 | of reward. Available distribution includes 54 | "Bernoulli", "Poisson" and "Gaussian". If "Gaussian" is chosen to be the 55 | reward distribution, 56 | a vector of standard deviation should be provided in \code{sd.reward}.} 57 | 58 | \item{sd.reward}{A vector of non-negative numbers specifying standard 59 | deviation of each arm's reward distribution if "Gaussian" is chosen to be 60 | the reward distribution. Default to be NULL. 61 | See \code{reward.family}.} 62 | 63 | \item{period}{A positive integer specifying the period index. 64 | Default to be 1.} 65 | 66 | \item{EXP3Info}{A list of three vectors \code{prevWeight}, \code{EXP3Trial} 67 | and \code{EXP3Reward} with dimension equal to the number of arms, 68 | needed for "EXP3", "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS": 69 | 70 | \code{prevWeight}: the weight vector in the previous EXP3 iteration. 71 | 72 | \code{EXP3Trial} and \code{EXP3Reward}: vectors representing 73 | the number of trials and the total reward for each arm in 74 | the previous period respectively. 75 | 76 | See \code{\link{SimulateMultiplePeriods}} for more details.} 77 | } 78 | \value{ 79 | A normalized weight vector for future randomized allocation. 80 | } 81 | \description{ 82 | This function is aimed to compute the probability of pulling each arm for 83 | various methods in Multi-Armed Bandit given the total reward and the number 84 | of trials for each arm. 85 | } 86 | \examples{ 87 | ### Calculate weights using Thompson Sampling if reward follows Poisson 88 | distribution. 89 | set.seed(100) 90 | CalculateWeight(method = "Thompson-Sampling", 91 | method.par = list(ndraws.TS = 1000), 92 | all.event = data.frame(reward = 1:3, trial = rep(10, 3)), 93 | reward.family = "Poisson") 94 | ### Calculate weights using EXP3 95 | CalculateWeight(method = "EXP3", 96 | method.par = list(EXP3 = list(gamma = 0.01, eta =0.1)), 97 | all.event = data.frame(reward = 1:3, trial = rep(10, 3)), 98 | reward.family = "Bernoulli", 99 | EXP3Info = list(prevWeight = rep(1, 3), 100 | EXP3Trial = rep(5, 3), 101 | EXP3Reward = 0:2)) 102 | } 103 | 104 | -------------------------------------------------------------------------------- /man/SimulateMultipleMethods.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SimulateMultipleMethods.R 3 | \name{SimulateMultipleMethods} 4 | \alias{SimulateMultipleMethods} 5 | \title{Compare various strategies for Multi-Armed Bandit in stationary 6 | and non-stationary scenarios} 7 | \usage{ 8 | SimulateMultipleMethods(method = "Thompson-Sampling", 9 | method.par = list(ndraws.TS = 1000), iter, nburnin, nperiod, 10 | reward.mean.family, reward.family, narms.family, npulls.family, 11 | stationary = TRUE, nonstationary.type = NULL, data.par, 12 | regret.plot = FALSE) 13 | } 14 | \arguments{ 15 | \item{method}{A vector of character strings choosing from "Epsilon-Greedy", 16 | "Epsilon-Decreasing", "Thompson-Sampling", 17 | "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling", 18 | "EXP3-Thompson-Sampling", 19 | "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS" and "HyperTS". 20 | See \code{\link{SimulateMultiplePeriods}} for more details. 21 | Default is "Thompson-Sampling".} 22 | 23 | \item{method.par}{A list of parameters needed for different methods: 24 | 25 | \code{epsilon}: A real number between 0 and 1; needed for "Epsilon-Greedy", 26 | "Epsilon-Decreasing", "Greedy-Thompson-Sampling" and 27 | "Greedy-Bayes-Poisson-TS". 28 | 29 | \code{ndraws.TS}: A positive integer specifying 30 | the number of random draws from the posterior; 31 | needed for "Thompson-Sampling", "Greedy-Thompson-Sampling" 32 | and "EXP3-Thompson-Sampling". Default is 1000. 33 | 34 | \code{EXP3}: A list consisting of two real numbers \code{eta} and 35 | \code{gamma}; 36 | \eqn{eta > 0} and \eqn{0 <= gamma < 1}; needed for "EXP3", 37 | "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS". 38 | 39 | \code{BP}: A list consisting of three postive integers \code{iter.BP}, 40 | \code{ndraws.BP} and \code{interval.BP}; 41 | needed for "Bayes-Poisson-TS", "Greedy-Bayes-Poisson-TS" 42 | and "EXP3-Bayes-Poisson-TS"; \code{iter.BP} specifies the number 43 | of iterations to compute posterior; 44 | \code{ndraws.BP} specifies the number of posterior samples drawn 45 | from posterior distribution; \code{interval.BP} is specified to 46 | draw each posterior sample from 47 | a sample sequence of length \code{interval.BP}. 48 | 49 | \code{HyperTS}: A list consisting of a vector \code{method.list}, 50 | needed for "HyperTS". \code{method.list} is a vector of character strings 51 | choosing from "Epsilon-Greedy", "Epsilon-Decreasing", "Thompson-Sampling", 52 | "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling", 53 | "EXP3-Thompson-Sampling", 54 | "Greedy-Bayes-Poisson-TS" and "EXP3-Bayes-Poisson-TS". 55 | "HyperTS" will construct an ensemble consisting all the methods 56 | in \code{method.list}.} 57 | 58 | \item{iter}{A positive integer specifying the number of iterations.} 59 | 60 | \item{nburnin}{A positive integer specifying the number of periods 61 | to allocate each arm equal traffic before applying any strategy.} 62 | 63 | \item{nperiod}{A positive integer specifying the number of periods 64 | to apply various strategies.} 65 | 66 | \item{reward.mean.family}{A character string specifying 67 | the distribution family to generate mean reward of each arm. 68 | Available distribution includes "Uniform", "Beta" and "Gaussian".} 69 | 70 | \item{reward.family}{A character string specifying the distribution family 71 | of reward. Available distribution includes 72 | "Bernoulli", "Poisson" and "Gaussian". 73 | If "Gaussian" is chosen to be the reward distribution, 74 | a vector of standard deviation should be provided in 75 | \code{sd.reward} in \code{data.par}.} 76 | 77 | \item{narms.family}{A character string specifying the distribution family 78 | of the number of arms. Available distribution includes "Poisson" and 79 | "Binomial".} 80 | 81 | \item{npulls.family}{A character string specifying the distribution family 82 | of the number of pulls per period. 83 | For continuous distribution, the number of pulls will be rounded up. 84 | Available distribution includes "Log-Normal" and "Poisson".} 85 | 86 | \item{stationary}{A logic value indicating whether a stationary 87 | Multi-Armed Bandit is considered (corresponding to the case that 88 | the reward mean is unchanged). Default to be TRUE.} 89 | 90 | \item{nonstationary.type}{A character string indicating 91 | how the mean reward varies. Available types include "Random Walk" and 92 | "Geometric Random Walk" 93 | (reward mean follows random walk in the log scale). Default to be NULL.} 94 | 95 | \item{data.par}{A list of data generating parameters: 96 | 97 | \code{reward.mean}: A list of parameters of \code{reward.mean.family}: 98 | \code{min} and \code{max} are two real numbers specifying 99 | the bounds when \eqn{reward.mean.family = "Uniform"}; \code{shape1} and 100 | \code{shape2} are two shape parameters when 101 | \eqn{reward.mean.family = "Beta"}; 102 | \code{mean} and \code{sd} specify mean and standard deviation 103 | when \eqn{reward.mean.family = "Gaussian"}. 104 | 105 | \code{reward.family}: A list of parameters of \code{reward.family}: 106 | \code{sd} is a vector of non-negative numbers specifying standard deviation 107 | of each arm's reward distribution 108 | if "Gaussian" is chosen to be the reward distribution. 109 | 110 | \code{narms.family}: A list of parameters of \code{narms.family}: 111 | \code{lambda} is a positive parameter specifying the mean when 112 | \eqn{narms.family = "Poisson"}; \code{size} and \code{prob} 113 | are 2 parameters needed when \eqn{narms.family = "Binomial"}. 114 | 115 | \code{npulls.family}: A list of parameters of \code{npulls.family}: 116 | \code{meanlog} and \code{sdlog} are 2 positive parameters specifying the mean 117 | and standard deviation in the log scale 118 | when \eqn{npulls.family = "Log-Normal"}; 119 | \code{lambda} is a positive parameter 120 | specifying the mean when \eqn{npulls.family = "Poisson"}. 121 | 122 | \code{nonstationary.family}: 123 | A list of parameters of \code{nonstationary.type}: 124 | \code{sd} is a positive parameter specifying the standard deviation 125 | of white noise 126 | when \eqn{nonstationary.type = "Random Walk"}; \code{sdlog} is 127 | a positive parameter specifying the log standard deviation of white noise 128 | when \eqn{nonstationary.type = "Geometric Random Walk"}.} 129 | 130 | \item{regret.plot}{A logic value indicating whether an average regret plot 131 | is returned. Default to be FALSE.} 132 | } 133 | \value{ 134 | a list consisting of: 135 | \item{regret.matrix}{A three-dimensional array with each dimension corresponding to the period, iteration and method.} 136 | \item{regret.plot.object}{If regret.plot = TRUE, a ggplot object is returned.} 137 | } 138 | \description{ 139 | This function is aimed to simulate data in different scenarios to 140 | compare various strategies in Multi-Armed Bandit. 141 | Users can specify the distribution of the number of arms, 142 | the distribution of mean reward, the distribution of the number of pulls 143 | in one period and the stationariness to simulate different scenarios. 144 | Relative regret is returned and average relative regret plot is returned 145 | if needed. 146 | See \code{\link{SimulateMultiplePeriods}} for more details. 147 | } 148 | \examples{ 149 | ### Compare Epsilon-Greedy and Thompson Sampling in the stationary case. 150 | set.seed(100) 151 | res <- SimulateMultipleMethods( 152 | method = c("Epsilon-Greedy", "Thompson-Sampling"), 153 | method.par = list(epsilon = 0.1, ndraws.TS = 1000), 154 | iter = 100, 155 | nburnin = 30, 156 | nperiod = 180, 157 | reward.mean.family = "Uniform", 158 | reward.family = "Bernoulli", 159 | narms.family = "Poisson", 160 | npulls.family = "Log-Normal", 161 | data.par = list(reward.mean = list(min = 0, max = 0.1), 162 | npulls.family = list(meanlog = 3, sdlog = 1.5), 163 | narms.family = list(lambda = 5)), 164 | regret.plot = TRUE) 165 | res$regret.plot.object 166 | ### Compare Epsilon-Greedy, Thompson Sampling and EXP3 in the non-stationary case. 167 | set.seed(100) 168 | res <- SimulateMultipleMethods( 169 | method = c("Epsilon-Greedy", "Thompson-Sampling", "EXP3"), 170 | method.par = list(epsilon = 0.1, 171 | ndraws.TS = 1000, 172 | EXP3 = list(gamma = 0, eta = 0.1)), 173 | iter = 100, 174 | nburnin = 30, 175 | nperiod = 90, 176 | reward.mean.family = "Beta", 177 | reward.family = "Bernoulli", 178 | narms.family = "Binomial", 179 | npulls.family = "Log-Normal", 180 | stationary = FALSE, 181 | nonstationary.type = "Geometric Random Walk", 182 | data.par = list(reward.mean = list(shape1 = 2, shape2 = 5), 183 | npulls.family = list(meanlog = 3, sdlog = 1), 184 | narms.family = list(size = 10, prob = 0.5), 185 | nonstationary.family = list(sdlog = 0.05)), 186 | regret.plot = TRUE) 187 | res$regret.plot.object 188 | } 189 | 190 | -------------------------------------------------------------------------------- /man/SimulateMultiplePeriods.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SimulateMultiplePeriods.R 3 | \name{SimulateMultiplePeriods} 4 | \alias{SimulateMultiplePeriods} 5 | \title{Simulate strategies for Multi-Armed Bandit in multiple periods} 6 | \usage{ 7 | SimulateMultiplePeriods(method = "Thompson-Sampling", 8 | method.par = list(ndraws.TS = 1000), nburnin, nperiod, reward.family, 9 | mean.reward, sd.reward = NULL, npulls.per.period = 1, 10 | weight.plot = FALSE, regret.plot = FALSE) 11 | } 12 | \arguments{ 13 | \item{method}{A character string choosing from "Epsilon-Greedy", 14 | "Epsilon-Decreasing", "Thompson-Sampling", 15 | "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling", 16 | "EXP3-Thompson-Sampling", 17 | "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS" and "HyperTS". 18 | For details of these methods, see below. Default is "Thompson-Sampling".} 19 | 20 | \item{method.par}{A list of parameters needed for different methods: 21 | 22 | \code{epsilon}: A real number between 0 and 1; needed for "Epsilon-Greedy", 23 | "Epsilon-Decreasing", "Greedy-Thompson-Sampling" and 24 | "Greedy-Bayes-Poisson-TS". 25 | 26 | \code{ndraws.TS}: A positive integer specifying the number of random draws 27 | from the posterior; 28 | needed for "Thompson-Sampling", "Greedy-Thompson-Sampling" and 29 | "EXP3-Thompson-Sampling". Default is 1000. 30 | 31 | \code{EXP3}: A list consisting of two real numbers \code{eta} and 32 | \code{gamma}; 33 | \eqn{eta > 0} and \eqn{0 <= gamma < 1}; needed for "EXP3", 34 | "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS". 35 | 36 | \code{BP}: A list consisting of three postive integers \code{iter.BP}, 37 | \code{ndraws.BP} and \code{interval.BP}; 38 | needed for "Bayes-Poisson-TS", "Greedy-Bayes-Poisson-TS" and 39 | "EXP3-Bayes-Poisson-TS"; \code{iter.BP} specifies the number of iterations 40 | to compute posterior; 41 | \code{ndraws.BP} specifies the number of posterior samples 42 | drawn from posterior distribution; \code{interval.BP} is specified to draw 43 | each posterior sample from a sample sequence of length \code{interval.BP}. 44 | 45 | \code{HyperTS}: A list consisting of a vector \code{method.list}, 46 | needed for "HyperTS". \code{method.list} is a vector of character strings 47 | choosing from "Epsilon-Greedy", "Epsilon-Decreasing", "Thompson-Sampling", 48 | "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling", 49 | "EXP3-Thompson-Sampling", "Greedy-Bayes-Poisson-TS" and 50 | "EXP3-Bayes-Poisson-TS". "HyperTS" will construct an ensemble consisting all 51 | the methods in \code{method.list}.} 52 | 53 | \item{nburnin}{A positive integer specifying the number of periods to 54 | allocate each arm equal traffic before applying any strategy.} 55 | 56 | \item{nperiod}{A positive integer specifying the number of periods 57 | to apply the strategy.} 58 | 59 | \item{reward.family}{A character string specifying the distribution family 60 | of reward. Available distribution includes 61 | "Bernoulli", "Poisson" and "Gaussian". If "Gaussian" is chosen to be the 62 | reward distribution, 63 | a vector of standard deviation should be provided in \code{sd.reward}.} 64 | 65 | \item{mean.reward}{A vector or a matrix of real numbers specifying the mean 66 | reward of each arm. If \code{mean.reward} is a vector, each element is the 67 | mean reward for each arm and the mean reward of each arm is unchanged 68 | throughout all periods (corresponding to the stationary Multi-Armed Bandit). 69 | If \code{mean.reward} is a matrix, it should 70 | have (\code{nburnin} + \code{nperiod}) rows. The mean reward of each arm 71 | could change. Each row represents a mean reward vector for each period 72 | (corresponding to nonstationary and adversarial Multi-Armed Bandit).} 73 | 74 | \item{sd.reward}{A vector of non-negative numbers specifying 75 | standard deviation of each arm's reward distribution if "Gaussian" is chosen 76 | to be the reward distribution. Default to be NULL. 77 | See \code{reward.family}.} 78 | 79 | \item{npulls.per.period}{A positive integer or a vector of positive 80 | integers. Default value is 1. If \code{npulls.per.period} is a positive 81 | integer, the number of pulls is \code{npulls.per.period} for each period. 82 | If \code{npulls.per.period} is a vector, each element represents 83 | the number of pulls for one period; the length of \code{npulls.per.period} 84 | should be equal to \code{nburnin} + \code{nperiod}.} 85 | 86 | \item{weight.plot}{A logic value with FALSE as default. If TRUE, weight plot 87 | object for each arm is returned.} 88 | 89 | \item{regret.plot}{A logic value with FALSE as default. If TRUE, relative 90 | regret plot object is returned.} 91 | } 92 | \value{ 93 | a list consisting of: 94 | \item{weight}{A weight matrix whose each element is the allocated weight 95 | for each arm and period. Each row represents one arm and each column 96 | represents one period.} 97 | \item{regret}{A relative regret vector whose each element is relative regret 98 | for each period. For definition of relative regret, see above.} 99 | \item{weight.plot.object}{If weight.plot = TRUE, a ggplot object is returned.} 100 | \item{regret.plot.object}{If regret.plot = TRUE, a ggplot object is returned.} 101 | } 102 | \description{ 103 | This function is aimed to simulate data to run 104 | strategies of Multi-Armed Bandit in a sequence of periods. Weight plot 105 | and regret plot are provided if needed. In each period there could be 106 | multiple pulls and each method can only be applied once. The default setting 107 | is that in each period there is only 1 pull, corresponding to continuous 108 | updating. 109 | } 110 | \details{ 111 | Various methods have been implemented. "Epsilon-Greedy" and 112 | "Epsilon-Decreasing" allocates \eqn{1 - epsilon} traffic to the arm which has 113 | the largest average reward and equally distribute the traffic 114 | to other arms. For "Epsilon-Greedy" epsilon in \code{method.par} serves as 115 | constant exploration rate . For "Epsilon-Decreasing" epsilon in 116 | \code{method.par} serves as exploration rate at period 1, 117 | while in period \eqn{t} exploration rate is \eqn{epsilon / t}. 118 | See \url{https://en.wikipedia.org/wiki/Multi-armed_bandit#Approximate_solutions} 119 | for more details about these strategies. 120 | 121 | "Thompson-Sampling" refers to Beta-Binomial Thompson Sampling using 122 | Beta(1, 1) as a prior. "Bayes-Poisson-TS" refers to Poisson-Gamma Thompson 123 | Sampling using a Bayesian Generalized Linear 124 | Mixed Effects Model to compute weights. "Bayes-Poisson-TS", 125 | "Greedy-Bayes-Poisson-TS" and "EXP3-Bayes-Poisson-TS" depends on the package 126 | "emre" to compute posterior distribution. For algorithm 127 | details, see the paper \url{https://arxiv.org/abs/1602.00047}. 128 | 129 | UCB (Upper Confidence Bound) is a classical method for Multi-Armed Bandit. 130 | For algorithm details, see the paper 131 | \url{http://personal.unileoben.ac.at/rortner/Pubs/UCBRev.pdf}. 132 | EXP3 is a method which needs to specify exploration rate \code{gamma} and 133 | exploitation rate \code{eta}. For algorithm details, see the paper 134 | \url{https://cseweb.ucsd.edu/~yfreund/papers/bandits.pdf}. 135 | 136 | Ensemble methods are also implemented. "Greedy-Thompson-Sampling" and 137 | "Greedy-Bayes-Poisson-TS" allocate \eqn{1 - epsilon} traffic to the arm 138 | corresponding to the largest 139 | Thompson sampling weight and allocate \eqn{epsilon} traffic 140 | corresponding to Thompson sampling weights. 141 | Instead of using average reward for each period to update weights in "EXP3", 142 | "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS" use Thompson sampling 143 | weights in the updating formula in "EXP3". 144 | "HyperTS" is an ensemble by applying Thompson Sampling to selecting the best 145 | method in each period based on previous performance. For algorithm details, 146 | see the paper 147 | \url{http://yxjiang.github.io/paper/RecSys2014-ensemble-bandit.pdf}. 148 | 149 | To measure the performance. Regret is computed by summing over the products 150 | of the number of pulls on one arm at one period and 151 | the difference of the mean reward of that arm compared with the largest one. 152 | Relative regret is 153 | computed by dividing the regret of a certain method over the regret of the 154 | benchmark method that allocates equal weights to each arm 155 | throughout all the periods. 156 | } 157 | \examples{ 158 | ### Simulate Thompson-Sampling 159 | set.seed(100) 160 | res <- SimulateMultiplePeriods(method = "Thompson-Sampling", 161 | method.par = list(ndraws.TS = 1000), 162 | nburnin = 30, 163 | nperiod = 180, 164 | npulls.per.period = 5, 165 | reward.family = "Bernoulli", 166 | mean.reward = runif(3, 0, 0.1), 167 | weight.plot = TRUE) 168 | res$weight.plot.object 169 | ### Simulate EXP3-Thompson-Sampling 170 | set.seed(100) 171 | res <- SimulateMultiplePeriods( 172 | method = "EXP3-Thompson-Sampling", 173 | method.par = list(ndraws.TS = 1000, 174 | EXP3 = list(gamma = 0, eta = 0.1)), 175 | nburnin = 30, 176 | nperiod = 180, 177 | npulls.per.period = 5, 178 | reward.family = "Bernoulli", 179 | mean.reward = runif(3, 0, 0.1), 180 | weight.plot = TRUE) 181 | res$weight.plot.object 182 | ### Simulate ensemble method HyperTS given "Thompson-Sampling", "Epsilon-Greedy" and "Epsilon-Decreasing" 183 | set.seed(100) 184 | res <- SimulateMultiplePeriods( 185 | method = "HyperTS", 186 | method.par = list( 187 | ndraws.TS = 1000, 188 | epsilon = 0.1, 189 | HyperTS = list(method.list = c("Thompson-Sampling", 190 | "Epsilon-Greedy", 191 | "Epsilon-Decreasing"))), 192 | nburnin = 30, 193 | nperiod = 180, 194 | npulls.per.period = 5, 195 | reward.family = "Poisson", 196 | mean.reward = runif(3, 0, 0.1), 197 | weight.plot = TRUE) 198 | res$weight.plot.object 199 | } 200 | 201 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | -------------------------------------------------------------------------------- /R/CalculateWeight.R: -------------------------------------------------------------------------------- 1 | # Copyright 2012-2017 Google 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | #' Calculate the probability of pulling each arm in the next period for various 15 | #' strategies 16 | #' 17 | #' This function is aimed to compute the probability of pulling each arm for 18 | #' various methods in Multi-Armed Bandit given the total reward and the number 19 | #' of trials for each arm. 20 | #' 21 | #' @param method A character string choosing from "Epsilon-Greedy", 22 | #' "Epsilon-Decreasing", "Thompson-Sampling", 23 | #' "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling", 24 | #' "EXP3-Thompson-Sampling", 25 | #' "Greedy-Bayes-Poisson-TS" and "EXP3-Bayes-Poisson-TS". 26 | #' See \code{\link{SimulateMultiplePeriods}} for more details. 27 | #' Default is "Thompson-Sampling". 28 | #' @param method.par A list of parameters needed for different methods: 29 | #' 30 | #' \code{epsilon}: A real number between 0 and 1; needed for "Epsilon-Greedy", 31 | #' "Epsilon-Decreasing", "Greedy-Thompson-Sampling" and 32 | #' "Greedy-Bayes-Poisson-TS". 33 | #' 34 | #' \code{ndraws.TS}: A positive integer specifying the number of random draws 35 | #' from the posterior; 36 | #' needed for "Thompson-Sampling", "Greedy-Thompson-Sampling" and 37 | #' "EXP3-Thompson-Sampling". Default is 1000. 38 | #' 39 | #' \code{EXP3}: A list consisting of two real numbers \code{eta} and 40 | #' \code{gamma}; 41 | #' \eqn{eta > 0} and \eqn{0 <= gamma < 1}; needed for "EXP3", 42 | #' "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS". 43 | #' 44 | #' \code{BP}: A list consisting of three postive integers \code{iter.BP}, 45 | #' \code{ndraws.BP} and \code{interval.BP}; 46 | #' needed for "Bayes-Poisson-TS", "Greedy-Bayes-Poisson-TS" and 47 | #' "EXP3-Bayes-Poisson-TS"; \code{iter.BP} specifies the number of iterations 48 | #' to compute posterior; 49 | #' \code{ndraws.BP} specifies the number of posterior samples drawn from 50 | #' posterior distribution; \code{interval.BP} is specified to draw each 51 | #' posterior sample from 52 | #' a sample sequence of length \code{interval.BP}. 53 | #' @param all.event A data frame containing two columns \code{trial} and 54 | #' \code{reward} with the number of rows equal to the number of arms. 55 | #' Each element of \code{trial} and \code{reward} represents the number of 56 | #' trials and the total reward for each arm respectively. 57 | #' @param reward.family A character string specifying the distribution family 58 | #' of reward. Available distribution includes 59 | #' "Bernoulli", "Poisson" and "Gaussian". If "Gaussian" is chosen to be the 60 | #' reward distribution, 61 | #' a vector of standard deviation should be provided in \code{sd.reward}. 62 | #' @param sd.reward A vector of non-negative numbers specifying standard 63 | #' deviation of each arm's reward distribution if "Gaussian" is chosen to be 64 | #' the reward distribution. Default to be NULL. 65 | #' See \code{reward.family}. 66 | #' @param period A positive integer specifying the period index. 67 | #' Default to be 1. 68 | #' @param EXP3Info A list of three vectors \code{prevWeight}, \code{EXP3Trial} 69 | #' and \code{EXP3Reward} with dimension equal to the number of arms, 70 | #' needed for "EXP3", "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS": 71 | #' 72 | #' \code{prevWeight}: the weight vector in the previous EXP3 iteration. 73 | #' 74 | #' \code{EXP3Trial} and \code{EXP3Reward}: vectors representing 75 | #' the number of trials and the total reward for each arm in 76 | #' the previous period respectively. 77 | #' 78 | #' See \code{\link{SimulateMultiplePeriods}} for more details. 79 | #' @return A normalized weight vector for future randomized allocation. 80 | #' @export 81 | #' @examples 82 | #' ### Calculate weights using Thompson Sampling if reward follows Poisson 83 | #' distribution. 84 | #' set.seed(100) 85 | #' CalculateWeight(method = "Thompson-Sampling", 86 | #' method.par = list(ndraws.TS = 1000), 87 | #' all.event = data.frame(reward = 1:3, trial = rep(10, 3)), 88 | #' reward.family = "Poisson") 89 | #' ### Calculate weights using EXP3 90 | #' CalculateWeight(method = "EXP3", 91 | #' method.par = list(EXP3 = list(gamma = 0.01, eta =0.1)), 92 | #' all.event = data.frame(reward = 1:3, trial = rep(10, 3)), 93 | #' reward.family = "Bernoulli", 94 | #' EXP3Info = list(prevWeight = rep(1, 3), 95 | #' EXP3Trial = rep(5, 3), 96 | #' EXP3Reward = 0:2)) 97 | 98 | CalculateWeight <- function(method = "Thompson-Sampling", 99 | method.par = list(ndraws.TS = 1000), 100 | all.event, 101 | reward.family, 102 | sd.reward = NULL, 103 | period = 1, 104 | EXP3Info = NULL){ 105 | method.name <- c("Epsilon-Greedy", "Epsilon-Decreasing", "Thompson-Sampling", 106 | "EXP3", "UCB", "Bayes-Poisson-TS", 107 | "Greedy-Thompson-Sampling", "EXP3-Thompson-Sampling", 108 | "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS") 109 | if (! method %in% method.name){ 110 | stop("Please specify correct method names!") 111 | } 112 | if (! reward.family %in% c("Bernoulli", "Poisson", "Gaussian")){ 113 | stop("Please specify correct reward family!") 114 | } 115 | if (method == method.name[1]){ 116 | if (! is.number(method.par$epsilon) ){ 117 | stop("Please specify correct parameters for Epsilon-Greedy!") 118 | } 119 | eps <- method.par$epsilon 120 | rate <- ifelse(all.event$trial == 0, 0, all.event$reward / all.event$trial) 121 | n <- length(rate) 122 | maxIdx <- which(rate == max(rate)) 123 | maxCount <- length(maxIdx) 124 | weight <- rep(eps / (n - maxCount), n) 125 | weight[maxIdx] <- (1 - eps) / maxCount 126 | return(weight) 127 | } 128 | 129 | if (method == method.name[2]){ 130 | if (! is.number(method.par$epsilon)){ 131 | stop("Please specify correct parameters for Epsilon-Decreasing!") 132 | } 133 | eps <- method.par$epsilon / period 134 | rate <- ifelse(all.event$trial == 0, 0, all.event$reward / all.event$trial) 135 | n <- length(rate) 136 | maxIdx <- which(rate == max(rate)) 137 | maxCount <- length(maxIdx) 138 | weight <- rep(eps / (n - maxCount), n) 139 | weight[maxIdx] <- (1 - eps) / maxCount 140 | return(weight) 141 | } 142 | 143 | if (method == method.name[3] | 144 | method == method.name[7] | 145 | method == method.name[8]){ 146 | if (! is.number(method.par$ndraws.TS)){ 147 | stop("Please specify correct parameters for Thompson-Sampling!") 148 | } 149 | ndraws.TS <- method.par$ndraws.TS 150 | reward <- all.event$reward 151 | trial <- all.event$trial 152 | n <- length(reward) 153 | ans <- matrix(nrow = ndraws.TS, ncol = n) 154 | if (reward.family == "Bernoulli"){ 155 | failure <- trial - reward 156 | for (i in 1:n) ans[ ,i] <- rbeta(ndraws.TS, shape1 = reward[i] + 1, 157 | shape2 = failure[i] + 1) 158 | } 159 | if (reward.family == "Gaussian"){ 160 | for (i in 1:n) ans[ ,i] <- rnorm(ndraws.TS, mean = reward[i] / trial[i], 161 | sd = sd.reward[i] / sqrt(trial[i])) 162 | } 163 | if (reward.family == "Poisson"){ 164 | for (i in 1:n) ans[ ,i] <- rgamma(ndraws.TS, shape = reward[i] + 1, 165 | scale = 1 / trial[i]) 166 | } 167 | w <- table(factor(max.col(ans), levels = 1:n)) 168 | if (method == method.name[3]){ 169 | return(as.vector(w / sum(w))) 170 | } 171 | if (method == method.name[7]){ 172 | if (! is.number(method.par$epsilon)){ 173 | stop("Please specify correct parameters for Greedy-Thompson-Sampling!") 174 | } 175 | n <- length(w) 176 | maxIdx <- which(w == max(w)) 177 | maxCount <- length(maxIdx) 178 | maxVector <- rep(0, n) 179 | maxVector[maxIdx] <- 1 / maxCount 180 | eps <- method.par$epsilon 181 | return(as.vector(eps * w / sum(w) + (1 - eps) * maxVector)) 182 | } 183 | if (method == method.name[8]){ 184 | if ( ! is.number(method.par$EXP3$gamma) | 185 | ! is.number(method.par$EXP3$eta) ){ 186 | stop("Please specify correct parameters for EXP3-Thompson-Sampling!") 187 | } 188 | if (reward.family != "Bernoulli"){ 189 | stop("Please use Bernoulli Reward Family to run EXP3") 190 | } 191 | eta <- method.par$EXP3$eta 192 | gamma <- method.par$EXP3$gamma 193 | prevWeight <- EXP3Info$prevWeight 194 | temp <- prevWeight * exp(eta * w / sum(w)) 195 | return(as.vector((1 - gamma) * temp / sum(temp) + gamma / n)) 196 | } 197 | } 198 | 199 | if (method == method.name[4]){ 200 | if (! is.number(method.par$EXP3$gamma) | ! is.number(method.par$EXP3$eta)){ 201 | stop("Please specify correct parameters for EXP3!") 202 | } 203 | if (reward.family != "Bernoulli"){ 204 | stop("Please use Bernoulli Reward Family to run EXP3") 205 | } 206 | prevWeight <- EXP3Info$prevWeight 207 | EXP3Trial <- EXP3Info$EXP3Trial 208 | EXP3Reward <- EXP3Info$EXP3Reward 209 | eta <- method.par$EXP3$eta 210 | gamma <- method.par$EXP3$gamma 211 | EXP3Rate <- ifelse(EXP3Trial == 0, 0, EXP3Reward / EXP3Trial) 212 | temp <- prevWeight * 213 | exp(eta * (EXP3Rate - max(EXP3Rate)) * sum(EXP3Trial) / 214 | length(EXP3Trial)) 215 | result <- as.vector((1 - gamma) * temp / sum(temp) + 216 | gamma /length(EXP3Trial)) 217 | return(result) 218 | } 219 | 220 | if (method == method.name[5]){ 221 | reward <- all.event$reward 222 | trial <- all.event$trial 223 | rate <- reward / trial 224 | UCB <- rate + sqrt(2 * log(period) / trial) 225 | maxIdx <- which(UCB == max(UCB)) 226 | maxCount <- length(maxIdx) 227 | maxVector <- rep(0, length(reward)) 228 | maxVector[maxIdx] <- 1 / maxCount 229 | return(maxVector) 230 | } 231 | 232 | if (method == method.name[6] | 233 | method == method.name[9] | 234 | method == method.name[10]){ 235 | if ( ! is.number(method.par$BP$iter.BP) | 236 | ! is.number(method.par$BP$ndraws.BP) | 237 | ! is.number(method.par$BP$interval.BP)){ 238 | stop("Please specify correct parameters for Bayes-Poisson-TS!") 239 | } 240 | if (reward.family == "Gaussian"){ 241 | stop("Please not use Gaussian Reward Family to run Bayes-Poisson-TS!") 242 | } 243 | iter.BP <- method.par$BP$iter.BP 244 | ndraws.BP <- method.par$BP$ndraws.BP 245 | interval.BP <- method.par$BP$interval.BP 246 | n <- length(all.event$trial) 247 | temp <- all.event 248 | temp$Id <- sapply(1:n, function(x) paste("Arm", x)) 249 | mdl <- SetupEMREoptim( 250 | "reward ~ 1 + (1|Id) + offset(trial)", 251 | data = temp, model.constructor = PoissonEMRE, 252 | burnin = iter.BP - ndraws.BP * interval.BP, 253 | thinning.interval = 1, llik.interval = 1) 254 | mdlResult <- FitEMRE(mdl, max.iter = iter.BP, debug = FALSE) 255 | posterior <- mdlResult$snapshots 256 | useIdx <- seq(1, ndraws.BP * interval.BP, interval.BP) 257 | armPos <- posterior[["1__Id"]][useIdx, ] 258 | biasPos <- posterior[["__bias__"]][useIdx, ] 259 | winnerPred <- sapply(1:length(biasPos), function(k) { 260 | biasK <- biasPos[k] 261 | armK <- armPos[k, ] 262 | predK <- biasK * armK 263 | return(which(predK == max(predK))[1]) 264 | }) 265 | w <- table(factor(winnerPred, levels = 1:n)) 266 | 267 | if (method == method.name[6]){ 268 | return(as.vector(w / sum(w))) 269 | } 270 | 271 | if (method == method.name[9]){ 272 | if ( ! is.number(method.par$epsilon)){ 273 | stop("Please specify correct parameters for Greedy-Bayes-Poisson-TS!") 274 | } 275 | n <- length(w) 276 | maxIdx <- which(w == max(w)) 277 | maxCount <- length(maxIdx) 278 | maxVector <- rep(0, n) 279 | maxVector[maxIdx] <- 1 / maxCount 280 | eps <- method.par$epsilon 281 | return(as.vector(eps * w / sum(w) + (1 - eps) * maxVector)) 282 | } 283 | 284 | if (method == method.name[10]){ 285 | if ( ! is.number(method.par$EXP3$gamma) | 286 | ! is.number(method.par$EXP3$eta) ){ 287 | stop("Please specify correct parameters for EXP3-Bayes-Poisson-TS!") 288 | } 289 | if (reward.family != "Bernoulli"){ 290 | stop("Please use Bernoulli Reward Family to run Bayes-Poisson-TS!") 291 | } 292 | eta <- method.par$EXP3$eta 293 | gamma <- method.par$EXP3$gamma 294 | prevWeight <- EXP3Info$prevWeight 295 | temp <- prevWeight * exp(eta * w / sum(w)) 296 | return(as.vector((1 - gamma) * temp / sum(temp) + gamma / n)) 297 | } 298 | } 299 | } 300 | -------------------------------------------------------------------------------- /R/CalculateWeight.R~: -------------------------------------------------------------------------------- 1 | # Copyright 2012-2017 Google 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | #' Calculate the probability of pulling each arm in the next period for various 15 | #' strategies 16 | #' 17 | #' This function is aimed to compute the probability of pulling each arm for 18 | #' various methods in Multi-Armed Bandit given the total reward and the number 19 | #' of trials for each arm. 20 | #' 21 | #' @param method A character string choosing from "Epsilon-Greedy", 22 | #' "Epsilon-Decreasing", "Thompson-Sampling", 23 | #' "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling", 24 | #' "EXP3-Thompson-Sampling", 25 | #' "Greedy-Bayes-Poisson-TS" and "EXP3-Bayes-Poisson-TS". 26 | #' See \code{\link{SimulateMultiplePeriods}} for more details. 27 | #' Default is "Thompson-Sampling". 28 | #' @param method.par A list of parameters needed for different methods: 29 | #' 30 | #' \code{epsilon}: A real number between 0 and 1; needed for "Epsilon-Greedy", 31 | #' "Epsilon-Decreasing", "Greedy-Thompson-Sampling" and "Greedy-Bayes-Poisson-TS". 32 | #' 33 | #' \code{ndraws.TS}: A positive integer specifying the number of random draws 34 | #' from the posterior; 35 | #' needed for "Thompson-Sampling", "Greedy-Thompson-Sampling" and 36 | #' "EXP3-Thompson-Sampling". Default is 1000. 37 | #' 38 | #' \code{EXP3}: A list consisting of two real numbers \code{eta} and \code{gamma}; 39 | #' \eqn{eta > 0} and \eqn{0 <= gamma < 1}; needed for "EXP3", 40 | #' "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS". 41 | #' 42 | #' \code{BP}: A list consisting of three postive integers \code{iter.BP}, 43 | #' \code{ndraws.BP} and \code{interval.BP}; 44 | #' needed for "Bayes-Poisson-TS", "Greedy-Bayes-Poisson-TS" and 45 | #' "EXP3-Bayes-Poisson-TS"; \code{iter.BP} specifies the number of iterations 46 | #' to compute posterior; 47 | #' \code{ndraws.BP} specifies the number of posterior samples drawn from 48 | #' posterior distribution; \code{interval.BP} is specified to draw each 49 | #' posterior sample from 50 | #' a sample sequence of length \code{interval.BP}. 51 | #' @param all.event A data frame containing two columns \code{trial} and 52 | #' \code{reward} with the number of rows equal to the number of arms. 53 | #' Each element of \code{trial} and \code{reward} represents the number of trials 54 | #' and the total reward for each arm respectively. 55 | #' @param reward.family A character string specifying the distribution family 56 | #' of reward. Available distribution includes 57 | #' "Bernoulli", "Poisson" and "Gaussian". If "Gaussian" is chosen to be the 58 | #' reward distribution, 59 | #' a vector of standard deviation should be provided in \code{sd.reward}. 60 | #' @param sd.reward A vector of non-negative numbers specifying standard 61 | #' deviation of each arm's reward distribution if "Gaussian" is chosen to be 62 | #' the reward distribution. Default to be NULL. 63 | #' See \code{reward.family}. 64 | #' @param period A positive integer specifying the period index. Default to be 1. 65 | #' @param EXP3Info A list of three vectors \code{prevWeight}, \code{EXP3Trial} 66 | #' and \code{EXP3Reward} with dimension equal to the number of arms, 67 | #' needed for "EXP3", "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS": 68 | #' 69 | #' \code{prevWeight}: the weight vector in the previous EXP3 iteration. 70 | #' 71 | #' \code{EXP3Trial} and \code{EXP3Reward}: vectors representing 72 | #' the number of trials and the total reward for each arm in 73 | #' the previous period respectively. 74 | #' 75 | #' See \code{\link{SimulateMultiplePeriods}} for more details. 76 | #' @return A normalized weight vector for future randomized allocation. 77 | #' @export 78 | #' @examples 79 | #' ### Calculate weights using Thompson Sampling if reward follows Poisson distribution 80 | #' set.seed(100) 81 | #' CalculateWeight(method = "Thompson-Sampling", 82 | #' method.par = list(ndraws.TS = 1000), 83 | #' all.event = data.frame(reward = 1:3, trial = rep(10, 3)), 84 | #' reward.family = "Poisson") 85 | #' ### Calculate weights using EXP3 86 | #' CalculateWeight(method = "EXP3", 87 | #' method.par = list(EXP3 = list(gamma = 0.01, eta =0.1)), 88 | #' all.event = data.frame(reward = 1:3, trial = rep(10, 3)), 89 | #' reward.family = "Bernoulli", 90 | #' EXP3Info = list(prevWeight = rep(1, 3), EXP3Trial = rep(5, 3), EXP3Reward = 0:2)) 91 | 92 | 93 | 94 | CalculateWeight <- function(method = "Thompson-Sampling", 95 | method.par = list(ndraws.TS = 1000), 96 | all.event, 97 | reward.family, 98 | sd.reward = NULL, 99 | period = 1, 100 | EXP3Info = NULL){ 101 | method.name <- c("Epsilon-Greedy", "Epsilon-Decreasing", "Thompson-Sampling", 102 | "EXP3", "UCB", "Bayes-Poisson-TS", 103 | "Greedy-Thompson-Sampling", "EXP3-Thompson-Sampling", 104 | "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS") 105 | if (! method %in% method.name){ 106 | stop("Please specify correct method names!") 107 | } 108 | if (! reward.family %in% c("Bernoulli", "Poisson", "Gaussian")){ 109 | stop("Please specify correct reward family!") 110 | } 111 | 112 | 113 | if (method == method.name[1]){ 114 | if (! is.number(method.par$epsilon) ){ 115 | stop("Please specify correct parameters for Epsilon-Greedy!") 116 | } 117 | eps <- method.par$epsilon 118 | rate <- ifelse(all.event$trial == 0, 0, all.event$reward / all.event$trial) 119 | n <- length(rate) 120 | maxIdx <- which(rate == max(rate)) 121 | maxCount <- length(maxIdx) 122 | weight <- rep(eps / (n - maxCount), n) 123 | weight[maxIdx] <- (1 - eps) / maxCount 124 | return(weight) 125 | } 126 | 127 | 128 | if (method == method.name[2]){ 129 | if ( ! is.number(method.par$epsilon)){ 130 | stop("Please specify correct parameters for Epsilon-Decreasing!") 131 | } 132 | eps <- method.par$epsilon / period 133 | rate <- ifelse(all.event$trial == 0, 0, all.event$reward / all.event$trial) 134 | n <- length(rate) 135 | maxIdx <- which(rate == max(rate)) 136 | maxCount <- length(maxIdx) 137 | weight <- rep(eps / (n - maxCount), n) 138 | weight[maxIdx] <- (1 - eps) / maxCount 139 | return(weight) 140 | } 141 | 142 | 143 | if (method == method.name[3] | 144 | method == method.name[7] | 145 | method == method.name[8]){ 146 | if (! is.number(method.par$ndraws.TS)){ 147 | stop("Please specify correct parameters for Thompson-Sampling!") 148 | } 149 | ndraws.TS <- method.par$ndraws.TS 150 | reward <- all.event$reward 151 | trial <- all.event$trial 152 | n <- length(reward) 153 | ans <- matrix(nrow = ndraws.TS, ncol = n) 154 | if (reward.family == "Bernoulli"){ 155 | failure <- trial - reward 156 | for (i in 1:n) ans[ ,i] <- rbeta(ndraws.TS, shape1 = reward[i] + 1, 157 | shape2 = failure[i] + 1) 158 | } 159 | if (reward.family == "Gaussian"){ 160 | for (i in 1:n) ans[ ,i] <- rnorm(ndraws.TS, mean = reward[i] / trial[i], 161 | sd = sd.reward[i] / sqrt(trial[i])) 162 | } 163 | if (reward.family == "Poisson"){ 164 | for (i in 1:n) ans[ ,i] <- rgamma(ndraws.TS, shape = reward[i] + 1, 165 | scale = 1 / trial[i]) 166 | } 167 | 168 | w <- table(factor(max.col(ans), levels = 1:n)) 169 | 170 | 171 | if (method == method.name[3]){ 172 | return(as.vector(w / sum(w))) 173 | } 174 | 175 | if (method == method.name[7]){ 176 | if (! is.number(method.par$epsilon)){ 177 | stop("Please specify correct parameters for Greedy-Thompson-Sampling!") 178 | } 179 | n <- length(w) 180 | maxIdx <- which(w == max(w)) 181 | maxCount <- length(maxIdx) 182 | maxVector <- rep(0, n) 183 | maxVector[maxIdx] <- 1 / maxCount 184 | eps <- method.par$epsilon 185 | return(as.vector(eps * w / sum(w) + (1 - eps) * maxVector)) 186 | } 187 | 188 | if (method == method.name[8]){ 189 | if ( ! is.number(method.par$EXP3$gamma) | 190 | ! is.number(method.par$EXP3$eta) ){ 191 | stop("Please specify correct parameters for EXP3-Thompson-Sampling!") 192 | } 193 | if (reward.family != "Bernoulli"){ 194 | stop("Please use Bernoulli Reward Family to run EXP3") 195 | } 196 | 197 | eta <- method.par$EXP3$eta 198 | gamma <- method.par$EXP3$gamma 199 | prevWeight <- EXP3Info$prevWeight 200 | temp <- prevWeight * exp(eta * w / sum(w)) 201 | 202 | return(as.vector((1 - gamma) * temp / sum(temp) + gamma / n)) 203 | } 204 | } 205 | 206 | 207 | if (method == method.name[4]){ 208 | if (! is.number(method.par$EXP3$gamma) | ! is.number(method.par$EXP3$eta)){ 209 | stop("Please specify correct parameters for EXP3!") 210 | } 211 | if (reward.family != "Bernoulli"){ 212 | stop("Please use Bernoulli Reward Family to run EXP3") 213 | } 214 | prevWeight <- EXP3Info$prevWeight 215 | EXP3Trial <- EXP3Info$EXP3Trial 216 | EXP3Reward <- EXP3Info$EXP3Reward 217 | eta <- method.par$EXP3$eta 218 | gamma <- method.par$EXP3$gamma 219 | 220 | EXP3Rate <- ifelse(EXP3Trial == 0, 0, EXP3Reward / EXP3Trial) 221 | temp <- prevWeight * 222 | exp(eta * (EXP3Rate - max(EXP3Rate)) * sum(EXP3Trial) / length(EXP3Trial)) 223 | 224 | return(as.vector((1 - gamma) * temp / sum(temp) + gamma / length(EXP3Trial))) 225 | } 226 | 227 | 228 | if (method == method.name[5]){ 229 | reward <- all.event$reward 230 | trial <- all.event$trial 231 | rate <- reward / trial 232 | UCB <- rate + sqrt(2 * log(period) / trial) 233 | maxIdx <- which(UCB == max(UCB)) 234 | maxCount <- length(maxIdx) 235 | maxVector <- rep(0, length(reward)) 236 | maxVector[maxIdx] <- 1 / maxCount 237 | return(maxVector) 238 | } 239 | 240 | if (method == method.name[6] | 241 | method == method.name[9] | 242 | method == method.name[10]){ 243 | if ( ! is.number(method.par$BP$iter.BP) | 244 | ! is.number(method.par$BP$ndraws.BP) | 245 | ! is.number(method.par$BP$interval.BP)){ 246 | stop("Please specify correct parameters for Bayes-Poisson-TS!") 247 | } 248 | if (reward.family == "Gaussian"){ 249 | stop("Please not use Gaussian Reward Family to run Bayes-Poisson-TS!") 250 | } 251 | iter.BP <- method.par$BP$iter.BP 252 | ndraws.BP <- method.par$BP$ndraws.BP 253 | interval.BP <- method.par$BP$interval.BP 254 | 255 | n <- length(all.event$trial) 256 | temp <- all.event 257 | temp$Id <- sapply(1:n, function(x) paste("Arm", x)) 258 | 259 | mdl <- SetupEMREoptim( 260 | "reward ~ 1 + (1|Id) + offset(trial)", 261 | data = temp, model.constructor = PoissonEMRE, 262 | burnin = iter.BP - ndraws.BP * interval.BP, 263 | thinning.interval = 1, llik.interval = 1) 264 | mdlResult <- FitEMRE(mdl, max.iter = iter.BP, debug = FALSE) 265 | posterior <- mdlResult$snapshots 266 | useIdx <- seq(1, ndraws.BP * interval.BP, interval.BP) 267 | armPos <- posterior[["1__Id"]][useIdx, ] 268 | biasPos <- posterior[["__bias__"]][useIdx, ] 269 | winnerPred <- sapply(1:length(biasPos), function(k) { 270 | biasK <- biasPos[k] 271 | armK <- armPos[k, ] 272 | predK <- biasK * armK 273 | return(which(predK == max(predK))[1]) 274 | }) 275 | w <- table(factor(winnerPred, levels = 1:n)) 276 | 277 | if (method == method.name[6]){ 278 | return(as.vector(w / sum(w))) 279 | } 280 | 281 | if (method == method.name[9]){ 282 | if ( ! is.number(method.par$epsilon)){ 283 | stop("Please specify correct parameters for Greedy-Bayes-Poisson-TS!") 284 | } 285 | n <- length(w) 286 | maxIdx <- which(w == max(w)) 287 | maxCount <- length(maxIdx) 288 | maxVector <- rep(0, n) 289 | maxVector[maxIdx] <- 1 / maxCount 290 | eps <- method.par$epsilon 291 | return(as.vector(eps * w / sum(w) + (1 - eps) * maxVector)) 292 | } 293 | 294 | if (method == method.name[10]){ 295 | if ( ! is.number(method.par$EXP3$gamma) | 296 | ! is.number(method.par$EXP3$eta) ){ 297 | stop("Please specify correct parameters for EXP3-Bayes-Poisson-TS!") 298 | } 299 | if (reward.family != "Bernoulli"){ 300 | stop("Please use Bernoulli Reward Family to run Bayes-Poisson-TS!") 301 | } 302 | 303 | eta <- method.par$EXP3$eta 304 | gamma <- method.par$EXP3$gamma 305 | prevWeight <- EXP3Info$prevWeight 306 | temp <- prevWeight * exp(eta * w / sum(w)) 307 | 308 | return(as.vector((1 - gamma) * temp / sum(temp) + gamma / n)) 309 | } 310 | } 311 | } 312 | -------------------------------------------------------------------------------- /R/SimulateMultipleMethods.R: -------------------------------------------------------------------------------- 1 | # Copyright 2012-2017 Google 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | #' Compare various strategies for Multi-Armed Bandit in stationary 15 | #' and non-stationary scenarios 16 | #' 17 | #' This function is aimed to simulate data in different scenarios to 18 | #' compare various strategies in Multi-Armed Bandit. 19 | #' Users can specify the distribution of the number of arms, 20 | #' the distribution of mean reward, the distribution of the number of pulls 21 | #' in one period and the stationariness to simulate different scenarios. 22 | #' Relative regret is returned and average relative regret plot is returned 23 | #' if needed. 24 | #' See \code{\link{SimulateMultiplePeriods}} for more details. 25 | #' @param method A vector of character strings choosing from "Epsilon-Greedy", 26 | #' "Epsilon-Decreasing", "Thompson-Sampling", 27 | #' "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling", 28 | #' "EXP3-Thompson-Sampling", 29 | #' "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS" and "HyperTS". 30 | #' See \code{\link{SimulateMultiplePeriods}} for more details. 31 | #' Default is "Thompson-Sampling". 32 | #' @param method.par A list of parameters needed for different methods: 33 | #' 34 | #' \code{epsilon}: A real number between 0 and 1; needed for "Epsilon-Greedy", 35 | #' "Epsilon-Decreasing", "Greedy-Thompson-Sampling" and 36 | #' "Greedy-Bayes-Poisson-TS". 37 | #' 38 | #' \code{ndraws.TS}: A positive integer specifying 39 | #' the number of random draws from the posterior; 40 | #' needed for "Thompson-Sampling", "Greedy-Thompson-Sampling" 41 | #' and "EXP3-Thompson-Sampling". Default is 1000. 42 | #' 43 | #' \code{EXP3}: A list consisting of two real numbers \code{eta} and 44 | #' \code{gamma}; 45 | #' \eqn{eta > 0} and \eqn{0 <= gamma < 1}; needed for "EXP3", 46 | #' "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS". 47 | #' 48 | #' \code{BP}: A list consisting of three postive integers \code{iter.BP}, 49 | #' \code{ndraws.BP} and \code{interval.BP}; 50 | #' needed for "Bayes-Poisson-TS", "Greedy-Bayes-Poisson-TS" 51 | #' and "EXP3-Bayes-Poisson-TS"; \code{iter.BP} specifies the number 52 | #' of iterations to compute posterior; 53 | #' \code{ndraws.BP} specifies the number of posterior samples drawn 54 | #' from posterior distribution; \code{interval.BP} is specified to 55 | #' draw each posterior sample from 56 | #' a sample sequence of length \code{interval.BP}. 57 | #' 58 | #' \code{HyperTS}: A list consisting of a vector \code{method.list}, 59 | #' needed for "HyperTS". \code{method.list} is a vector of character strings 60 | #' choosing from "Epsilon-Greedy", "Epsilon-Decreasing", "Thompson-Sampling", 61 | #' "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling", 62 | #' "EXP3-Thompson-Sampling", 63 | #' "Greedy-Bayes-Poisson-TS" and "EXP3-Bayes-Poisson-TS". 64 | #' "HyperTS" will construct an ensemble consisting all the methods 65 | #' in \code{method.list}. 66 | #' @param iter A positive integer specifying the number of iterations. 67 | #' @param nburnin A positive integer specifying the number of periods 68 | #' to allocate each arm equal traffic before applying any strategy. 69 | #' @param nperiod A positive integer specifying the number of periods 70 | #' to apply various strategies. 71 | #' 72 | #' @param reward.mean.family A character string specifying 73 | #' the distribution family to generate mean reward of each arm. 74 | #' Available distribution includes "Uniform", "Beta" and "Gaussian". 75 | #' @param reward.family A character string specifying the distribution family 76 | #' of reward. Available distribution includes 77 | #' "Bernoulli", "Poisson" and "Gaussian". 78 | #' If "Gaussian" is chosen to be the reward distribution, 79 | #' a vector of standard deviation should be provided in 80 | #' \code{sd.reward} in \code{data.par}. 81 | #' @param narms.family A character string specifying the distribution family 82 | #' of the number of arms. Available distribution includes "Poisson" and 83 | #' "Binomial". 84 | #' @param npulls.family A character string specifying the distribution family 85 | #' of the number of pulls per period. 86 | #' For continuous distribution, the number of pulls will be rounded up. 87 | #' Available distribution includes "Log-Normal" and "Poisson". 88 | #' @param stationary A logic value indicating whether a stationary 89 | #' Multi-Armed Bandit is considered (corresponding to the case that 90 | #' the reward mean is unchanged). Default to be TRUE. 91 | #' @param nonstationary.type A character string indicating 92 | #' how the mean reward varies. Available types include "Random Walk" and 93 | #' "Geometric Random Walk" 94 | #' (reward mean follows random walk in the log scale). Default to be NULL. 95 | #' @param data.par A list of data generating parameters: 96 | #' 97 | #' \code{reward.mean}: A list of parameters of \code{reward.mean.family}: 98 | #' \code{min} and \code{max} are two real numbers specifying 99 | #' the bounds when \eqn{reward.mean.family = "Uniform"}; \code{shape1} and 100 | #' \code{shape2} are two shape parameters when 101 | #' \eqn{reward.mean.family = "Beta"}; 102 | #' \code{mean} and \code{sd} specify mean and standard deviation 103 | #' when \eqn{reward.mean.family = "Gaussian"}. 104 | #' 105 | #' \code{reward.family}: A list of parameters of \code{reward.family}: 106 | #' \code{sd} is a vector of non-negative numbers specifying standard deviation 107 | #' of each arm's reward distribution 108 | #' if "Gaussian" is chosen to be the reward distribution. 109 | #' 110 | #' \code{narms.family}: A list of parameters of \code{narms.family}: 111 | #' \code{lambda} is a positive parameter specifying the mean when 112 | #' \eqn{narms.family = "Poisson"}; \code{size} and \code{prob} 113 | #' are 2 parameters needed when \eqn{narms.family = "Binomial"}. 114 | #' 115 | #' \code{npulls.family}: A list of parameters of \code{npulls.family}: 116 | #' \code{meanlog} and \code{sdlog} are 2 positive parameters specifying the mean 117 | #' and standard deviation in the log scale 118 | #' when \eqn{npulls.family = "Log-Normal"}; 119 | #' \code{lambda} is a positive parameter 120 | #' specifying the mean when \eqn{npulls.family = "Poisson"}. 121 | #' 122 | #' \code{nonstationary.family}: 123 | #' A list of parameters of \code{nonstationary.type}: 124 | #' \code{sd} is a positive parameter specifying the standard deviation 125 | #' of white noise 126 | #' when \eqn{nonstationary.type = "Random Walk"}; \code{sdlog} is 127 | #' a positive parameter specifying the log standard deviation of white noise 128 | #' when \eqn{nonstationary.type = "Geometric Random Walk"}. 129 | #' @param regret.plot A logic value indicating whether an average regret plot 130 | #' is returned. Default to be FALSE. 131 | #' @return a list consisting of: 132 | #' \item{regret.matrix}{A three-dimensional array with each dimension corresponding to the period, iteration and method.} 133 | #' \item{regret.plot.object}{If regret.plot = TRUE, a ggplot object is returned.} 134 | #' @export 135 | #' @examples 136 | #' ### Compare Epsilon-Greedy and Thompson Sampling in the stationary case. 137 | #' set.seed(100) 138 | #' res <- SimulateMultipleMethods( 139 | #' method = c("Epsilon-Greedy", "Thompson-Sampling"), 140 | #' method.par = list(epsilon = 0.1, ndraws.TS = 1000), 141 | #' iter = 100, 142 | #' nburnin = 30, 143 | #' nperiod = 180, 144 | #' reward.mean.family = "Uniform", 145 | #' reward.family = "Bernoulli", 146 | #' narms.family = "Poisson", 147 | #' npulls.family = "Log-Normal", 148 | #' data.par = list(reward.mean = list(min = 0, max = 0.1), 149 | #' npulls.family = list(meanlog = 3, sdlog = 1.5), 150 | #' narms.family = list(lambda = 5)), 151 | #' regret.plot = TRUE) 152 | #' res$regret.plot.object 153 | #' ### Compare Epsilon-Greedy, Thompson Sampling and EXP3 in the non-stationary case. 154 | #' set.seed(100) 155 | #' res <- SimulateMultipleMethods( 156 | #' method = c("Epsilon-Greedy", "Thompson-Sampling", "EXP3"), 157 | #' method.par = list(epsilon = 0.1, 158 | #' ndraws.TS = 1000, 159 | #' EXP3 = list(gamma = 0, eta = 0.1)), 160 | #' iter = 100, 161 | #' nburnin = 30, 162 | #' nperiod = 90, 163 | #' reward.mean.family = "Beta", 164 | #' reward.family = "Bernoulli", 165 | #' narms.family = "Binomial", 166 | #' npulls.family = "Log-Normal", 167 | #' stationary = FALSE, 168 | #' nonstationary.type = "Geometric Random Walk", 169 | #' data.par = list(reward.mean = list(shape1 = 2, shape2 = 5), 170 | #' npulls.family = list(meanlog = 3, sdlog = 1), 171 | #' narms.family = list(size = 10, prob = 0.5), 172 | #' nonstationary.family = list(sdlog = 0.05)), 173 | #' regret.plot = TRUE) 174 | #' res$regret.plot.object 175 | 176 | SimulateMultipleMethods <- function(method = "Thompson-Sampling", 177 | method.par = list(ndraws.TS = 1000), 178 | iter, 179 | nburnin, 180 | nperiod, 181 | reward.mean.family, 182 | reward.family, 183 | narms.family, 184 | npulls.family, 185 | stationary = TRUE, 186 | nonstationary.type = NULL, 187 | data.par, 188 | regret.plot = FALSE){ 189 | if (! all(method %in% c("Epsilon-Greedy", "Epsilon-Decreasing", 190 | "Thompson-Sampling","EXP3", "UCB", "Bayes-Poisson-TS", 191 | "Greedy-Thompson-Sampling", "EXP3-Thompson-Sampling", 192 | "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS", 193 | "HyperTS"))){ 194 | stop("Please specify correct method names!") 195 | } 196 | if (! reward.family %in% c("Bernoulli", "Poisson", "Gaussian")){ 197 | stop("Please specify correct reward family!") 198 | } 199 | if (! reward.mean.family %in% c("Uniform", "Beta", "Gaussian")){ 200 | stop("Please specify correct mean reward family!") 201 | } 202 | if (! narms.family %in% c("Binomial", "Poisson")){ 203 | stop("Please specify correct distribution family for the number of arms!") 204 | } 205 | if (! npulls.family %in% c("Log-Normal", "Poisson")){ 206 | stop("Please specify correct distribution family for the number of pulls!") 207 | } 208 | 209 | nmethod <- length(method) 210 | regret.matrix <- array(0, c(nperiod, iter, nmethod)) 211 | for (i in 1:iter){ 212 | if (narms.family == "Poisson"){ 213 | lambda <- data.par$narms.family$lambda 214 | while(TRUE){ 215 | narms <- rpois(1, lambda) 216 | if (narms > 1){ 217 | break 218 | } 219 | } 220 | } 221 | if (narms.family == "Binomial"){ 222 | size <- data.par$narms.family$size 223 | prob <- data.par$narms.family$prob 224 | while(TRUE){ 225 | narms <- rbinom(1, size, prob) 226 | if (narms > 1){ 227 | break 228 | } 229 | } 230 | } 231 | 232 | if (reward.mean.family == "Uniform"){ 233 | mean.reward <- runif(narms, min = data.par$reward.mean$min, 234 | max = data.par$reward.mean$max) 235 | } 236 | if (reward.mean.family == "Beta"){ 237 | mean.reward <- rbeta(narms, shape1 = data.par$reward.mean$shape1, 238 | shape2 = data.par$reward.mean$shape2) 239 | } 240 | if (reward.mean.family == "Gaussian"){ 241 | if (reward.family == "Bernoulli" | reward.family == "Poisson"){ 242 | stop("Please not use Gaussian if reward family is Bernoulli or Poisson!") 243 | } 244 | mean.reward <- rnorm(narms, mean = data.par$reward.mean$mean, 245 | sd = data.par$reward.mean$sd) 246 | } 247 | 248 | if (stationary == FALSE){ 249 | mean.reward.matrix <- matrix(0, nrow = nburnin + nperiod, ncol = narms) 250 | mean.reward.matrix[1, ] <- mean.reward 251 | if (nonstationary.type == "Geometric Random Walk"){ 252 | for (j in 2:(nburnin + nperiod)){ 253 | if (reward.family == "Bernoulli"){ 254 | mean.reward.matrix[j, ] <- 255 | sapply(mean.reward.matrix[j - 1, ] * 256 | rlnorm(narms, meanlog = 0, 257 | sdlog = data.par$nonstationary.family$sdlog), 258 | function(x) min(1, x)) 259 | }else{ 260 | mean.reward.matrix[j, ] <- 261 | mean.reward.matrix[j - 1, ] * 262 | rlnorm(narms, meanlog = 0, 263 | sdlog = data.par$nonstationary.family$sdlog) 264 | } 265 | } 266 | mean.reward <- mean.reward.matrix 267 | } 268 | if (nonstationary.type == "Random Walk"){ 269 | if (reward.family == "Bernoulli" | reward.family == "Poisson"){ 270 | stop("Please not use Random Walk to generate mean reward if reward family is Bernoulli or Poisson!") 271 | } 272 | for (j in 2:(nburnin + nperiod)){ 273 | mean.reward.matrix[j, ] <- mean.reward.matrix[j - 1, ] + 274 | rnorm(narms, mean = 0, sd = data.par$nonstationary.family$sd) 275 | } 276 | mean.reward <- mean.reward.matrix 277 | } 278 | } 279 | 280 | if (npulls.family == "Log-Normal"){ 281 | npulls.per.period <- ceiling( 282 | rlnorm(nburnin + nperiod, 283 | meanlog = data.par$npulls.family$meanlog, 284 | sdlog = data.par$npulls.family$sdlog)) 285 | } 286 | if (npulls.family == "Poisson"){ 287 | npulls.per.period <- rep(0, nburnin + nperiod) 288 | for (num in 1:(nburnin + nperiod)){ 289 | while(TRUE){ 290 | npulls.per.period[num] <- 291 | rpois(1, lambda = data.par$npulls.family$lambda) 292 | if (npulls.per.period[num] > 0){ 293 | break 294 | } 295 | } 296 | } 297 | } 298 | 299 | for (k in 1:length(method)){ 300 | regret <- SimulateMultiplePeriods(method = method[k], 301 | nburnin = nburnin, 302 | nperiod = nperiod, 303 | mean.reward = mean.reward, 304 | reward.family = reward.family, 305 | sd.reward = data.par$reward.family$sd, 306 | npulls.per.period = npulls.per.period, 307 | method.par = method.par)$regret 308 | regret.matrix[, i, k] <- regret 309 | } 310 | } 311 | 312 | if (regret.plot == TRUE){ 313 | relativeRegret <- c(apply(regret.matrix, c(1,3), mean)) 314 | methodName <- rep(method, each = nperiod) 315 | daySeq <- rep(1:nperiod, nmethod) 316 | graphData <- data.frame(daySeq, relativeRegret, methodName) 317 | regret.plot.object <- ggplot(graphData, 318 | aes(x = daySeq, 319 | y = relativeRegret, 320 | colour = factor(methodName))) + 321 | geom_line() + ggtitle("Regret Plot") + 322 | theme(legend.text = element_text(size = 12, face = "bold"), 323 | axis.text.y = element_text(size = 12), 324 | legend.title = element_blank()) + 325 | labs(y = "Relative Regret", x = "day") 326 | return(list(regret.matrix = regret.matrix, 327 | regret.plot.object = regret.plot.object)) 328 | } else { 329 | return(list(regret.matrix = regret.matrix)) 330 | } 331 | } 332 | -------------------------------------------------------------------------------- /R/SimulateMultipleMethods.R~: -------------------------------------------------------------------------------- 1 | # Copyright 2012-2017 Google 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | #' Compare various strategies for Multi-Armed Bandit in stationary 15 | #' and non-stationary scenarios 16 | #' 17 | #' This function is aimed to simulate data in different scenarios to 18 | #' compare various strategies in Multi-Armed Bandit. 19 | #' Users can specify the distribution of the number of arms, 20 | #' the distribution of mean reward, the distribution of the number of pulls 21 | #' in one period and the stationariness to simulate different scenarios. 22 | #' Relative regret is returned and average relative regret plot is returned if needed. 23 | #' See \code{\link{SimulateMultiplePeriods}} for more details. 24 | #' @param method A vector of character strings choosing from "Epsilon-Greedy", 25 | #' "Epsilon-Decreasing", "Thompson-Sampling", 26 | #' "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling", 27 | #' "EXP3-Thompson-Sampling", 28 | #' "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS" and "HyperTS". 29 | #' See \code{\link{SimulateMultiplePeriods}} for more details. 30 | #' Default is "Thompson-Sampling". 31 | #' @param method.par A list of parameters needed for different methods: 32 | #' 33 | #' \code{epsilon}: A real number between 0 and 1; needed for "Epsilon-Greedy", 34 | #' "Epsilon-Decreasing", "Greedy-Thompson-Sampling" and "Greedy-Bayes-Poisson-TS". 35 | #' 36 | #' \code{ndraws.TS}: A positive integer specifying 37 | #' the number of random draws from the posterior; 38 | #' needed for "Thompson-Sampling", "Greedy-Thompson-Sampling" 39 | #' and "EXP3-Thompson-Sampling". Default is 1000. 40 | #' 41 | #' \code{EXP3}: A list consisting of two real numbers \code{eta} and \code{gamma}; 42 | #' \eqn{eta > 0} and \eqn{0 <= gamma < 1}; needed for "EXP3", 43 | #' "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS". 44 | #' 45 | #' \code{BP}: A list consisting of three postive integers \code{iter.BP}, 46 | #' \code{ndraws.BP} and \code{interval.BP}; 47 | #' needed for "Bayes-Poisson-TS", "Greedy-Bayes-Poisson-TS" 48 | #' and "EXP3-Bayes-Poisson-TS"; \code{iter.BP} specifies the number of iterations 49 | #' to compute posterior; 50 | #' \code{ndraws.BP} specifies the number of posterior samples drawn 51 | #' from posterior distribution; \code{interval.BP} is specified to 52 | #' draw each posterior sample from 53 | #' a sample sequence of length \code{interval.BP}. 54 | #' 55 | #' \code{HyperTS}: A list consisting of a vector \code{method.list}, 56 | #' needed for "HyperTS". \code{method.list} is a vector of character strings 57 | #' choosing from "Epsilon-Greedy", "Epsilon-Decreasing", "Thompson-Sampling", 58 | #' "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling", 59 | #' "EXP3-Thompson-Sampling", 60 | #' "Greedy-Bayes-Poisson-TS" and "EXP3-Bayes-Poisson-TS". 61 | #' "HyperTS" will construct an ensemble consisting all the methods 62 | #' in \code{method.list}. 63 | #' @param iter A positive integer specifying the number of iterations. 64 | #' @param nburnin A positive integer specifying the number of periods 65 | #' to allocate each arm equal traffic before applying any strategy. 66 | #' @param nperiod A positive integer specifying the number of periods 67 | #' to apply various strategies. 68 | #' 69 | #' @param reward.mean.family A character string specifying 70 | #' the distribution family to generate mean reward of each arm. 71 | #' Available distribution includes "Uniform", "Beta" and "Gaussian". 72 | #' @param reward.family A character string specifying the distribution family 73 | #' of reward. Available distribution includes 74 | #' "Bernoulli", "Poisson" and "Gaussian". 75 | #' If "Gaussian" is chosen to be the reward distribution, 76 | #' a vector of standard deviation should be provided in 77 | #' \code{sd.reward} in \code{data.par}. 78 | #' @param narms.family A character string specifying the distribution family 79 | #' of the number of arms. Available distribution includes "Poisson" and "Binomial". 80 | #' @param npulls.family A character string specifying the distribution family 81 | #' of the number of pulls per period. 82 | #' For continuous distribution, the number of pulls will be rounded up. 83 | #' Available distribution includes "Log-Normal" and "Poisson". 84 | #' @param stationary A logic value indicating whether a stationary 85 | #' Multi-Armed Bandit is considered (corresponding to the case that 86 | #' the reward mean is unchanged). Default to be TRUE. 87 | #' @param nonstationary.type A character string indicating 88 | #' how the mean reward varies. Available types include "Random Walk" and 89 | #' "Geometric Random Walk" 90 | #' (reward mean follows random walk in the log scale). Default to be NULL. 91 | #' @param data.par A list of data generating parameters: 92 | #' 93 | #' \code{reward.mean}: A list of parameters of \code{reward.mean.family}: 94 | #' \code{min} and \code{max} are two real numbers specifying 95 | #' the bounds when \eqn{reward.mean.family = "Uniform"}; \code{shape1} and 96 | #' \code{shape2} are two shape parameters when \eqn{reward.mean.family = "Beta"}; 97 | #' \code{mean} and \code{sd} specify mean and standard deviation 98 | #' when \eqn{reward.mean.family = "Gaussian"}. 99 | #' 100 | #' \code{reward.family}: A list of parameters of \code{reward.family}: 101 | #' \code{sd} is a vector of non-negative numbers specifying standard deviation 102 | #' of each arm's reward distribution 103 | #' if "Gaussian" is chosen to be the reward distribution. 104 | #' 105 | #' \code{narms.family}: A list of parameters of \code{narms.family}: 106 | #' \code{lambda} is a positive parameter specifying the mean when 107 | #' \eqn{narms.family = "Poisson"}; \code{size} and \code{prob} 108 | #' are 2 parameters needed when \eqn{narms.family = "Binomial"}. 109 | #' 110 | #' \code{npulls.family}: A list of parameters of \code{npulls.family}: 111 | #' \code{meanlog} and \code{sdlog} are 2 positive parameters specifying the mean 112 | #' and standard deviation in the log scale 113 | #' when \eqn{npulls.family = "Log-Normal"}; \code{lambda} is a positive parameter 114 | #' specifying the mean when \eqn{npulls.family = "Poisson"}. 115 | #' 116 | #' 117 | #' \code{nonstationary.family}: A list of parameters of \code{nonstationary.type}: 118 | #' \code{sd} is a positive parameter specifying the standard deviation 119 | #' of white noise 120 | #' when \eqn{nonstationary.type = "Random Walk"}; \code{sdlog} is 121 | #' a positive parameter specifying the log standard deviation of white noise 122 | #' when \eqn{nonstationary.type = "Geometric Random Walk"}. 123 | #' @param regret.plot A logic value indicating whether an average regret plot 124 | #' is returned. Default to be FALSE. 125 | #' @return a list consisting of: 126 | #' \item{regret.matrix}{A three-dimensional array with each dimension corresponding to the period, iteration and method.} 127 | #' \item{regret.plot.object}{If regret.plot = TRUE, a ggplot object is returned.} 128 | #' @export 129 | #' @examples 130 | #' ### Compare Epsilon-Greedy and Thompson Sampling in the stationary case. 131 | #' set.seed(100) 132 | #' res <- SimulateMultipleMethods(method = c("Epsilon-Greedy", "Thompson-Sampling"), 133 | #' method.par = list(epsilon = 0.1, ndraws.TS = 1000), 134 | #' iter = 100, 135 | #' nburnin = 30, 136 | #' nperiod = 180, 137 | #' reward.mean.family = "Uniform", 138 | #' reward.family = "Bernoulli", 139 | #' narms.family = "Poisson", 140 | #' npulls.family = "Log-Normal", 141 | #' data.par = list(reward.mean = list(min = 0, max = 0.1), 142 | #' npulls.family = list(meanlog = 3, sdlog = 1.5), 143 | #' narms.family = list(lambda = 5)), 144 | #' regret.plot = TRUE) 145 | #' res$regret.plot.object 146 | #' ### Compare Epsilon-Greedy, Thompson Sampling and EXP3 in the non-stationary case. 147 | #' set.seed(100) 148 | #' res <- SimulateMultipleMethods(method = c("Epsilon-Greedy", "Thompson-Sampling", "EXP3"), 149 | #' method.par = list(epsilon = 0.1, 150 | #' ndraws.TS = 1000, 151 | #' EXP3 = list(gamma = 0, eta = 0.1)), 152 | #' iter = 100, 153 | #' nburnin = 30, 154 | #' nperiod = 90, 155 | #' reward.mean.family = "Beta", 156 | #' reward.family = "Bernoulli", 157 | #' narms.family = "Binomial", 158 | #' npulls.family = "Log-Normal", 159 | #' stationary = FALSE, 160 | #' nonstationary.type = "Geometric Random Walk", 161 | #' data.par = list(reward.mean = list(shape1 = 2, shape2 = 5), 162 | #' npulls.family = list(meanlog = 3, sdlog = 1), 163 | #' narms.family = list(size = 10, prob = 0.5), 164 | #' nonstationary.family = list(sdlog = 0.05)), 165 | #' regret.plot = TRUE) 166 | #' res$regret.plot.object 167 | 168 | 169 | 170 | SimulateMultipleMethods <- function(method = "Thompson-Sampling", 171 | method.par = list(ndraws.TS = 1000), 172 | iter, 173 | nburnin, 174 | nperiod, 175 | reward.mean.family, 176 | reward.family, 177 | narms.family, 178 | npulls.family, 179 | stationary = TRUE, 180 | nonstationary.type = NULL, 181 | data.par, 182 | regret.plot = FALSE){ 183 | if (! all(method %in% c("Epsilon-Greedy", "Epsilon-Decreasing", 184 | "Thompson-Sampling","EXP3", "UCB", "Bayes-Poisson-TS", 185 | "Greedy-Thompson-Sampling", "EXP3-Thompson-Sampling", 186 | "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS", 187 | "HyperTS"))){ 188 | stop("Please specify correct method names!") 189 | } 190 | if (! reward.family %in% c("Bernoulli", "Poisson", "Gaussian")){ 191 | stop("Please specify correct reward family!") 192 | } 193 | if (! reward.mean.family %in% c("Uniform", "Beta", "Gaussian")){ 194 | stop("Please specify correct mean reward family!") 195 | } 196 | if (! narms.family %in% c("Binomial", "Poisson")){ 197 | stop("Please specify correct distribution family for the number of arms!") 198 | } 199 | if (! npulls.family %in% c("Log-Normal", "Poisson")){ 200 | stop("Please specify correct distribution family for the number of pulls!") 201 | } 202 | 203 | 204 | nmethod <- length(method) 205 | regret.matrix <- array(0, c(nperiod, iter, nmethod)) 206 | for (i in 1:iter){ 207 | if (narms.family == "Poisson"){ 208 | lambda <- data.par$narms.family$lambda 209 | while(TRUE){ 210 | narms <- rpois(1, lambda) 211 | if (narms > 1){ 212 | break 213 | } 214 | } 215 | } 216 | if (narms.family == "Binomial"){ 217 | size <- data.par$narms.family$size 218 | prob <- data.par$narms.family$prob 219 | while(TRUE){ 220 | narms <- rbinom(1, size, prob) 221 | if (narms > 1){ 222 | break 223 | } 224 | } 225 | } 226 | 227 | 228 | if (reward.mean.family == "Uniform"){ 229 | mean.reward <- runif(narms, min = data.par$reward.mean$min, 230 | max = data.par$reward.mean$max) 231 | } 232 | if (reward.mean.family == "Beta"){ 233 | mean.reward <- rbeta(narms, shape1 = data.par$reward.mean$shape1, 234 | shape2 = data.par$reward.mean$shape2) 235 | } 236 | if (reward.mean.family == "Gaussian"){ 237 | if (reward.family == "Bernoulli" | reward.family == "Poisson"){ 238 | stop("Please not use Gaussian if reward family is Bernoulli or Poisson!") 239 | } 240 | mean.reward <- rnorm(narms, mean = data.par$reward.mean$mean, 241 | sd = data.par$reward.mean$sd) 242 | } 243 | 244 | 245 | if (stationary == FALSE){ 246 | mean.reward.matrix <- matrix(0, nrow = nburnin + nperiod, ncol = narms) 247 | mean.reward.matrix[1, ] <- mean.reward 248 | if (nonstationary.type == "Geometric Random Walk"){ 249 | for (j in 2:(nburnin + nperiod)){ 250 | if (reward.family == "Bernoulli"){ 251 | mean.reward.matrix[j, ] <- 252 | sapply(mean.reward.matrix[j - 1, ] * 253 | rlnorm(narms, meanlog = 0, 254 | sdlog = data.par$nonstationary.family$sdlog), 255 | function(x) min(1, x)) 256 | }else{ 257 | mean.reward.matrix[j, ] <- 258 | mean.reward.matrix[j - 1, ] * 259 | rlnorm(narms, meanlog = 0, 260 | sdlog = data.par$nonstationary.family$sdlog) 261 | } 262 | } 263 | mean.reward <- mean.reward.matrix 264 | } 265 | if (nonstationary.type == "Random Walk"){ 266 | if (reward.family == "Bernoulli" | reward.family == "Poisson"){ 267 | stop("Please not use Random Walk to generate mean reward if reward family is Bernoulli or Poisson!") 268 | } 269 | for (j in 2:(nburnin + nperiod)){ 270 | mean.reward.matrix[j, ] <- mean.reward.matrix[j - 1, ] + 271 | rnorm(narms, mean = 0, sd = data.par$nonstationary.family$sd) 272 | } 273 | mean.reward <- mean.reward.matrix 274 | } 275 | } 276 | 277 | 278 | if (npulls.family == "Log-Normal"){ 279 | npulls.per.period <- ceiling(rlnorm(nburnin + nperiod, 280 | meanlog = data.par$npulls.family$meanlog, 281 | sdlog = data.par$npulls.family$sdlog)) 282 | } 283 | if (npulls.family == "Poisson"){ 284 | npulls.per.period <- rep(0, nburnin + nperiod) 285 | for (num in 1:(nburnin + nperiod)){ 286 | while(TRUE){ 287 | npulls.per.period[num] <- rpois(1, lambda = data.par$npulls.family$lambda) 288 | if (npulls.per.period[num] > 0){ 289 | break 290 | } 291 | } 292 | } 293 | } 294 | 295 | for (k in 1:length(method)){ 296 | regret <- SimulateMultiplePeriods(method = method[k], 297 | nburnin = nburnin, 298 | nperiod = nperiod, 299 | mean.reward = mean.reward, 300 | reward.family = reward.family, 301 | sd.reward = data.par$reward.family$sd, 302 | npulls.per.period = npulls.per.period, 303 | method.par = method.par)$regret 304 | regret.matrix[, i, k] <- regret 305 | } 306 | } 307 | 308 | if (regret.plot == TRUE){ 309 | relativeRegret <- c(apply(regret.matrix, c(1,3), mean)) 310 | methodName <- rep(method, each = nperiod) 311 | daySeq <- rep(1:nperiod, nmethod) 312 | graphData <- data.frame(daySeq, relativeRegret, methodName) 313 | regret.plot.object <- ggplot(graphData, 314 | aes(x = daySeq, 315 | y = relativeRegret, 316 | colour = factor(methodName))) + 317 | geom_line() + ggtitle("Regret Plot") + 318 | theme(legend.text = element_text(size = 12, face = "bold"), 319 | axis.text.y = element_text(size = 12), 320 | legend.title = element_blank()) + 321 | labs(y = "Relative Regret", x = "day") 322 | return(list(regret.matrix = regret.matrix, 323 | regret.plot.object = regret.plot.object)) 324 | }else{ 325 | return(list(regret.matrix = regret.matrix)) 326 | } 327 | } 328 | 329 | 330 | -------------------------------------------------------------------------------- /R/SimulateMultiplePeriods.R: -------------------------------------------------------------------------------- 1 | # Copyright 2012-2017 Google 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | #' Simulate strategies for Multi-Armed Bandit in multiple periods 15 | #' 16 | #' This function is aimed to simulate data to run 17 | #' strategies of Multi-Armed Bandit in a sequence of periods. Weight plot 18 | #' and regret plot are provided if needed. In each period there could be 19 | #' multiple pulls and each method can only be applied once. The default setting 20 | #' is that in each period there is only 1 pull, corresponding to continuous 21 | #' updating. 22 | #' 23 | #' Various methods have been implemented. "Epsilon-Greedy" and 24 | #' "Epsilon-Decreasing" allocates \eqn{1 - epsilon} traffic to the arm which has 25 | #' the largest average reward and equally distribute the traffic 26 | #' to other arms. For "Epsilon-Greedy" epsilon in \code{method.par} serves as 27 | #' constant exploration rate . For "Epsilon-Decreasing" epsilon in 28 | #' \code{method.par} serves as exploration rate at period 1, 29 | #' while in period \eqn{t} exploration rate is \eqn{epsilon / t}. 30 | #' See \url{https://en.wikipedia.org/wiki/Multi-armed_bandit#Approximate_solutions} 31 | #' for more details about these strategies. 32 | #' 33 | #' "Thompson-Sampling" refers to Beta-Binomial Thompson Sampling using 34 | #' Beta(1, 1) as a prior. "Bayes-Poisson-TS" refers to Poisson-Gamma Thompson 35 | #' Sampling using a Bayesian Generalized Linear 36 | #' Mixed Effects Model to compute weights. "Bayes-Poisson-TS", 37 | #' "Greedy-Bayes-Poisson-TS" and "EXP3-Bayes-Poisson-TS" depends on the package 38 | #' "emre" to compute posterior distribution. For algorithm 39 | #' details, see the paper \url{https://arxiv.org/abs/1602.00047}. 40 | #' 41 | #' UCB (Upper Confidence Bound) is a classical method for Multi-Armed Bandit. 42 | #' For algorithm details, see the paper 43 | #' \url{http://personal.unileoben.ac.at/rortner/Pubs/UCBRev.pdf}. 44 | #' EXP3 is a method which needs to specify exploration rate \code{gamma} and 45 | #' exploitation rate \code{eta}. For algorithm details, see the paper 46 | #' \url{https://cseweb.ucsd.edu/~yfreund/papers/bandits.pdf}. 47 | #' 48 | #' Ensemble methods are also implemented. "Greedy-Thompson-Sampling" and 49 | #' "Greedy-Bayes-Poisson-TS" allocate \eqn{1 - epsilon} traffic to the arm 50 | #' corresponding to the largest 51 | #' Thompson sampling weight and allocate \eqn{epsilon} traffic 52 | #' corresponding to Thompson sampling weights. 53 | #' Instead of using average reward for each period to update weights in "EXP3", 54 | #' "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS" use Thompson sampling 55 | #' weights in the updating formula in "EXP3". 56 | #' "HyperTS" is an ensemble by applying Thompson Sampling to selecting the best 57 | #' method in each period based on previous performance. For algorithm details, 58 | #' see the paper 59 | #' \url{http://yxjiang.github.io/paper/RecSys2014-ensemble-bandit.pdf}. 60 | #' 61 | #' To measure the performance. Regret is computed by summing over the products 62 | #' of the number of pulls on one arm at one period and 63 | #' the difference of the mean reward of that arm compared with the largest one. 64 | #' Relative regret is 65 | #' computed by dividing the regret of a certain method over the regret of the 66 | #' benchmark method that allocates equal weights to each arm 67 | #' throughout all the periods. 68 | #' 69 | #' 70 | #' @param method A character string choosing from "Epsilon-Greedy", 71 | #' "Epsilon-Decreasing", "Thompson-Sampling", 72 | #' "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling", 73 | #' "EXP3-Thompson-Sampling", 74 | #' "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS" and "HyperTS". 75 | #' For details of these methods, see below. Default is "Thompson-Sampling". 76 | #' @param method.par A list of parameters needed for different methods: 77 | #' 78 | #' \code{epsilon}: A real number between 0 and 1; needed for "Epsilon-Greedy", 79 | #' "Epsilon-Decreasing", "Greedy-Thompson-Sampling" and 80 | #' "Greedy-Bayes-Poisson-TS". 81 | #' 82 | #' \code{ndraws.TS}: A positive integer specifying the number of random draws 83 | #' from the posterior; 84 | #' needed for "Thompson-Sampling", "Greedy-Thompson-Sampling" and 85 | #' "EXP3-Thompson-Sampling". Default is 1000. 86 | #' 87 | #' \code{EXP3}: A list consisting of two real numbers \code{eta} and 88 | #' \code{gamma}; 89 | #' \eqn{eta > 0} and \eqn{0 <= gamma < 1}; needed for "EXP3", 90 | #' "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS". 91 | #' 92 | #' \code{BP}: A list consisting of three postive integers \code{iter.BP}, 93 | #' \code{ndraws.BP} and \code{interval.BP}; 94 | #' needed for "Bayes-Poisson-TS", "Greedy-Bayes-Poisson-TS" and 95 | #' "EXP3-Bayes-Poisson-TS"; \code{iter.BP} specifies the number of iterations 96 | #' to compute posterior; 97 | #' \code{ndraws.BP} specifies the number of posterior samples 98 | #' drawn from posterior distribution; \code{interval.BP} is specified to draw 99 | #' each posterior sample from a sample sequence of length \code{interval.BP}. 100 | #' 101 | #' \code{HyperTS}: A list consisting of a vector \code{method.list}, 102 | #' needed for "HyperTS". \code{method.list} is a vector of character strings 103 | #' choosing from "Epsilon-Greedy", "Epsilon-Decreasing", "Thompson-Sampling", 104 | #' "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling", 105 | #' "EXP3-Thompson-Sampling", "Greedy-Bayes-Poisson-TS" and 106 | #' "EXP3-Bayes-Poisson-TS". "HyperTS" will construct an ensemble consisting all 107 | #' the methods in \code{method.list}. 108 | #' @param nburnin A positive integer specifying the number of periods to 109 | #' allocate each arm equal traffic before applying any strategy. 110 | #' @param nperiod A positive integer specifying the number of periods 111 | #' to apply the strategy. 112 | #' @param npulls.per.period A positive integer or a vector of positive 113 | #' integers. Default value is 1. If \code{npulls.per.period} is a positive 114 | #' integer, the number of pulls is \code{npulls.per.period} for each period. 115 | #' If \code{npulls.per.period} is a vector, each element represents 116 | #' the number of pulls for one period; the length of \code{npulls.per.period} 117 | #' should be equal to \code{nburnin} + \code{nperiod}. 118 | #' @param reward.family A character string specifying the distribution family 119 | #' of reward. Available distribution includes 120 | #' "Bernoulli", "Poisson" and "Gaussian". If "Gaussian" is chosen to be the 121 | #' reward distribution, 122 | #' a vector of standard deviation should be provided in \code{sd.reward}. 123 | #' @param sd.reward A vector of non-negative numbers specifying 124 | #' standard deviation of each arm's reward distribution if "Gaussian" is chosen 125 | #' to be the reward distribution. Default to be NULL. 126 | #' See \code{reward.family}. 127 | #' @param mean.reward A vector or a matrix of real numbers specifying the mean 128 | #' reward of each arm. If \code{mean.reward} is a vector, each element is the 129 | #' mean reward for each arm and the mean reward of each arm is unchanged 130 | #' throughout all periods (corresponding to the stationary Multi-Armed Bandit). 131 | #' If \code{mean.reward} is a matrix, it should 132 | #' have (\code{nburnin} + \code{nperiod}) rows. The mean reward of each arm 133 | #' could change. Each row represents a mean reward vector for each period 134 | #' (corresponding to nonstationary and adversarial Multi-Armed Bandit). 135 | #' @param weight.plot A logic value with FALSE as default. If TRUE, weight plot 136 | #' object for each arm is returned. 137 | #' @param regret.plot A logic value with FALSE as default. If TRUE, relative 138 | #' regret plot object is returned. 139 | #' 140 | #' @return a list consisting of: 141 | #' \item{weight}{A weight matrix whose each element is the allocated weight 142 | #' for each arm and period. Each row represents one arm and each column 143 | #' represents one period.} 144 | #' \item{regret}{A relative regret vector whose each element is relative regret 145 | #' for each period. For definition of relative regret, see above.} 146 | #' \item{weight.plot.object}{If weight.plot = TRUE, a ggplot object is returned.} 147 | #' \item{regret.plot.object}{If regret.plot = TRUE, a ggplot object is returned.} 148 | #' @export 149 | #' @examples 150 | #' ### Simulate Thompson-Sampling 151 | #' set.seed(100) 152 | #' res <- SimulateMultiplePeriods(method = "Thompson-Sampling", 153 | #' method.par = list(ndraws.TS = 1000), 154 | #' nburnin = 30, 155 | #' nperiod = 180, 156 | #' npulls.per.period = 5, 157 | #' reward.family = "Bernoulli", 158 | #' mean.reward = runif(3, 0, 0.1), 159 | #' weight.plot = TRUE) 160 | #' res$weight.plot.object 161 | #' ### Simulate EXP3-Thompson-Sampling 162 | #' set.seed(100) 163 | #' res <- SimulateMultiplePeriods( 164 | #' method = "EXP3-Thompson-Sampling", 165 | #' method.par = list(ndraws.TS = 1000, 166 | #' EXP3 = list(gamma = 0, eta = 0.1)), 167 | #' nburnin = 30, 168 | #' nperiod = 180, 169 | #' npulls.per.period = 5, 170 | #' reward.family = "Bernoulli", 171 | #' mean.reward = runif(3, 0, 0.1), 172 | #' weight.plot = TRUE) 173 | #' res$weight.plot.object 174 | #' ### Simulate ensemble method HyperTS given "Thompson-Sampling", "Epsilon-Greedy" and "Epsilon-Decreasing" 175 | #' set.seed(100) 176 | #' res <- SimulateMultiplePeriods( 177 | #' method = "HyperTS", 178 | #' method.par = list( 179 | #' ndraws.TS = 1000, 180 | #' epsilon = 0.1, 181 | #' HyperTS = list(method.list = c("Thompson-Sampling", 182 | #' "Epsilon-Greedy", 183 | #' "Epsilon-Decreasing"))), 184 | #' nburnin = 30, 185 | #' nperiod = 180, 186 | #' npulls.per.period = 5, 187 | #' reward.family = "Poisson", 188 | #' mean.reward = runif(3, 0, 0.1), 189 | #' weight.plot = TRUE) 190 | #' res$weight.plot.object 191 | 192 | SimulateMultiplePeriods <- function(method = "Thompson-Sampling", 193 | method.par = list(ndraws.TS = 1000), 194 | nburnin, 195 | nperiod, 196 | reward.family, 197 | mean.reward, 198 | sd.reward = NULL, 199 | npulls.per.period = 1, 200 | weight.plot = FALSE, 201 | regret.plot = FALSE){ 202 | if (! method %in% c("Epsilon-Greedy", "Epsilon-Decreasing", 203 | "Thompson-Sampling","EXP3", "UCB", "Bayes-Poisson-TS", 204 | "Greedy-Thompson-Sampling", "EXP3-Thompson-Sampling", 205 | "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS", 206 | "HyperTS")){ 207 | stop("Please specify correct method names!") 208 | } 209 | 210 | if (is.vector(mean.reward)){ 211 | rewardVec <- mean.reward 212 | } 213 | 214 | if (length(npulls.per.period) == 1){ 215 | npullsVec <- npulls.per.period 216 | }else if(length(npulls.per.period) != nburnin + nperiod | 217 | min(npulls.per.period) <= 0){ 218 | stop("Please specify correct number of pulls per period!") 219 | } 220 | 221 | if (is.vector(mean.reward)){ 222 | bestReward <- max(rewardVec) 223 | n <- length(rewardVec) 224 | }else{ 225 | bestReward <- apply(mean.reward, 1, max) 226 | n <- dim(mean.reward)[2] 227 | } 228 | 229 | if (reward.family == "Gaussian" & 230 | (length(sd.reward) != n | anyNA(sd.reward))){ 231 | stop("Please specify correct standard deviation for Gaussian reward family!") 232 | } 233 | 234 | if (is.vector(mean.reward)){ 235 | if (length(npulls.per.period) == 1){ 236 | burninTrial <- rmultinom(1, nburnin * npullsVec, rep(1 / n, n)) 237 | }else{ 238 | burninTrial <- rmultinom(1, sum(npulls.per.period[1:nburnin]), 239 | rep(1 / n, n)) 240 | } 241 | burninReward <- apply(cbind(burninTrial, rewardVec, sd.reward), 1, 242 | GetReward, reward.family) 243 | } else { 244 | burninTrial <- rep(0, n) 245 | burninReward <- rep(0, n) 246 | if (length(npulls.per.period) == 1){ 247 | for (period in 1:nburnin){ 248 | tempTrial <- rmultinom(1, npullsVec, rep(1 / n, n)) 249 | tempReward <- apply(cbind(tempTrial, mean.reward[period, ], sd.reward), 250 | 1, GetReward, reward.family) 251 | burninTrial <- burninTrial + tempTrial 252 | burninReward <- burninReward + tempReward 253 | } 254 | } else { 255 | for (period in 1:nburnin){ 256 | tempTrial <- rmultinom(1, npulls.per.period[period], rep(1 / n, n)) 257 | tempReward <- apply(cbind(tempTrial, mean.reward[period, ], sd.reward), 258 | 1, GetReward, reward.family) 259 | burninTrial <- burninTrial + tempTrial 260 | burninReward <- burninReward + tempReward 261 | } 262 | } 263 | } 264 | burninEvent <- data.frame(trial = burninTrial, reward = burninReward) 265 | 266 | equalDailyRegret <- rep(0, nperiod) 267 | for (period in 1:nperiod){ 268 | if (length(npulls.per.period) == 1){ 269 | dailyTrial <- rep(npullsVec / n, n) 270 | }else{ 271 | dailyTrial <- rep(npulls.per.period[nburnin + period] / n, n) 272 | } 273 | if (is.vector(mean.reward)){ 274 | equalDailyRegret[period] <- sum(dailyTrial * (bestReward - rewardVec)) 275 | }else{ 276 | equalDailyRegret[period] <- 277 | sum(dailyTrial * (bestReward[nburnin + period] - 278 | mean.reward[nburnin + period, ])) 279 | } 280 | } 281 | 282 | allWeight <- cbind() 283 | all.event <- burninEvent 284 | dailyRegret <- rep(0, nperiod) 285 | weight <- as.vector(CalculateWeight(method = "Thompson-Sampling", 286 | sd.reward = sd.reward, 287 | reward.family = reward.family, 288 | all.event = all.event, 289 | method.par = list(ndraws.TS = 1000))) 290 | EXP3Info <- list(prevWeight = weight, 291 | EXP3Trial = burninTrial, 292 | EXP3Reward = burninReward) 293 | 294 | if (method == "HyperTS"){ 295 | nmethod <- length(method.par$HyperTS$method.list) 296 | total.reward <- rep(0, nmethod) 297 | if (reward.family == "Bernoulli"){ 298 | total.trial <- rep(0, nmethod) 299 | } 300 | if (reward.family == "Gaussian" | reward.family == "Poisson"){ 301 | total.trial <- rep(1, nmethod) 302 | } 303 | } 304 | 305 | for (period in 1:nperiod){ 306 | if (method == "HyperTS"){ 307 | ndraws <- 1000 308 | ans <- matrix(nrow = ndraws, ncol = nmethod) 309 | if (reward.family == "Bernoulli"){ 310 | for (i in 1:nmethod) ans[ ,i] <- 311 | rbeta(ndraws, total.reward[i] + 1, 312 | total.trial[i] - total.reward[i] + 1) 313 | } 314 | if (reward.family == "Gaussian"){ 315 | for (i in 1:nmethod) ans[ ,i] <- 316 | rnorm(ndraws, mean = total.reward[i] / total.trial[i], 317 | sd = sd.reward[i] / sqrt(total.trial[i])) 318 | } 319 | if (reward.family == "Poisson"){ 320 | for (i in 1:nmethod) ans[ ,i] <- 321 | rgamma(ndraws, shape = total.reward[i] + 1, 322 | scale = 1 / total.trial[i]) 323 | } 324 | method.index <- which.max(as.vector(table(factor(max.col(ans), 325 | levels = 1:nmethod)))) 326 | method.chosen <- method.par$HyperTS$method.list[method.index] 327 | weight <- CalculateWeight(method.chosen, 328 | all.event = all.event, 329 | sd.reward = sd.reward, 330 | reward.family = reward.family, 331 | method.par = method.par, 332 | period = period, 333 | EXP3Info = EXP3Info) 334 | } 335 | 336 | if (method != "HyperTS"){ 337 | weight <- CalculateWeight(method, 338 | all.event = all.event, 339 | sd.reward = sd.reward, 340 | reward.family = reward.family, 341 | method.par = method.par, 342 | period = period, 343 | EXP3Info = EXP3Info) 344 | } 345 | 346 | allWeight <- cbind(allWeight, weight) 347 | if (length(npulls.per.period) == 1){ 348 | dailyTrial <- rmultinom(1, npullsVec, weight) 349 | } else { 350 | dailyTrial <- rmultinom(1, npulls.per.period[nburnin + period], weight) 351 | } 352 | if (is.vector(mean.reward)){ 353 | dailyReward <- apply(cbind(dailyTrial, rewardVec, sd.reward), 354 | 1, GetReward, reward.family) 355 | } else { 356 | dailyReward <- 357 | apply(cbind(dailyTrial, mean.reward[nburnin + period, ], sd.reward), 358 | 1, GetReward, reward.family) 359 | } 360 | all.event$trial <- all.event$trial + dailyTrial 361 | all.event$reward <- all.event$reward + dailyReward 362 | if (is.vector(mean.reward)){ 363 | dailyRegret[period] <- sum(dailyTrial * (bestReward - rewardVec)) 364 | }else{ 365 | dailyRegret[period] <- 366 | sum(dailyTrial * 367 | (bestReward[nburnin + period] - mean.reward[nburnin + period, ])) 368 | } 369 | EXP3Info = list(prevWeight = weight, 370 | EXP3Trial = dailyTrial, 371 | EXP3Reward = dailyReward) 372 | if (method == "HyperTS"){ 373 | total.reward[method.index] <- 374 | total.reward[method.index] + sum(dailyReward) 375 | total.trial[method.index] <- total.trial[method.index] + sum(dailyTrial) 376 | } 377 | } 378 | relativeRegret <- ifelse(equalDailyRegret != 0, 379 | dailyRegret / equalDailyRegret, 0) 380 | 381 | if (weight.plot == TRUE){ 382 | weightVector <- c(t(allWeight)) 383 | if (is.vector(mean.reward)){ 384 | names <- rep(sapply(rewardVec, function(x) paste("Mean Reward =", x)), 385 | each = nperiod) 386 | }else{ 387 | names <- rep(sapply(1:n, function(x) paste("Arm", x)), each = nperiod) 388 | } 389 | periodSeq <- rep(1:nperiod, n) 390 | graphData <- data.frame(names, periodSeq, weightVector) 391 | weight.plot.object <- ggplot(graphData, 392 | aes(x = periodSeq, 393 | y = weightVector, 394 | colour = names)) + 395 | geom_line() + ggtitle("Weight Plot") + 396 | theme(legend.text = element_text(size = 12, face = "bold"), 397 | axis.text.y = element_text(size = 12), 398 | legend.title = element_blank()) + 399 | labs(y = "weight", x = "period") 400 | } 401 | if (regret.plot == TRUE){ 402 | periodSeq <- 1:nperiod 403 | graphData <- data.frame(periodSeq, relativeRegret) 404 | regret.plot.object <- ggplot(graphData, 405 | aes(x = periodSeq, 406 | y = relativeRegret)) + 407 | geom_line() + ggtitle("Regret Plot") + 408 | theme(legend.text = element_text(size = 12, face = "bold"), 409 | axis.text.y = element_text(size = 12), 410 | legend.title = element_blank()) + 411 | labs(y = "Relative Regret", x = "period") 412 | } 413 | if (weight.plot == TRUE & regret.plot == TRUE){ 414 | return(list(weight = allWeight, 415 | regret = relativeRegret, 416 | weight.plot.object = weight.plot.object, 417 | regret.plot.object = regret.plot.object)) 418 | } 419 | if (weight.plot == FALSE & regret.plot == TRUE){ 420 | return(list(weight = allWeight, 421 | regret = relativeRegret, 422 | regret.plot.object = regret.plot.object)) 423 | } 424 | if (weight.plot == TRUE & regret.plot == FALSE){ 425 | return(list(weight = allWeight, 426 | regret = relativeRegret, 427 | weight.plot.object = weight.plot.object)) 428 | } 429 | if (weight.plot == FALSE & regret.plot == FALSE){ 430 | return(list(weight = allWeight, regret = relativeRegret)) 431 | } 432 | } 433 | 434 | GetReward <- function(x, reward.family, sd.reward){ 435 | if (reward.family == "Bernoulli") { 436 | return(rbinom(n = 1, size = x[1], prob = x[2])) 437 | } 438 | if (reward.family == "Gaussian") { 439 | return(rnorm(n = 1, mean = x[1] * x[2], sd = x[3] * sqrt(x[1]))) 440 | } 441 | if (reward.family == "Poisson"){ 442 | return(rpois(n = 1, lambda = x[1] * x[2])) 443 | } 444 | } 445 | -------------------------------------------------------------------------------- /R/SimulateMultiplePeriods.R~: -------------------------------------------------------------------------------- 1 | # Copyright 2012-2017 Google 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | #' Simulate strategies for Multi-Armed Bandit in multiple periods 15 | #' 16 | #' This function is aimed to simulate data to run 17 | #' strategies of Multi-Armed Bandit in a sequence of periods. Weight plot 18 | #' and regret plot are provided if needed. In each period there could be 19 | #' multiple pulls and each method can only be applied once. The default setting 20 | #' is that in each period there is only 1 pull, corresponding to continuous 21 | #' updating. 22 | #' 23 | #' Various methods have been implemented. "Epsilon-Greedy" and 24 | #' "Epsilon-Decreasing" allocates \eqn{1 - epsilon} traffic to the arm which has 25 | #' the largest average reward and equally distribute the traffic 26 | #' to other arms. For "Epsilon-Greedy" epsilon in \code{method.par} serves as 27 | #' constant exploration rate . For "Epsilon-Decreasing" epsilon in 28 | #' \code{method.par} serves as exploration rate at period 1, 29 | #' while in period \eqn{t} exploration rate is \eqn{epsilon / t}. 30 | #' See \url{https://en.wikipedia.org/wiki/Multi-armed_bandit#Approximate_solutions} 31 | #' for more details about these strategies. 32 | #' 33 | #' "Thompson-Sampling" refers to Beta-Binomial Thompson Sampling using 34 | #' Beta(1, 1) as a prior. "Bayes-Poisson-TS" refers to Poisson-Gamma Thompson 35 | #' Sampling using a Bayesian Generalized Linear 36 | #' Mixed Effects Model to compute weights. "Bayes-Poisson-TS", 37 | #' "Greedy-Bayes-Poisson-TS" and "EXP3-Bayes-Poisson-TS" depends on the package 38 | #' "emre" to compute posterior distribution. For algorithm 39 | #' details, see the paper \url{https://arxiv.org/abs/1602.00047}. 40 | #' 41 | #' UCB (Upper Confidence Bound) is a classical method for Multi-Armed Bandit. 42 | #' For algorithm details, see the paper 43 | #' \url{http://personal.unileoben.ac.at/rortner/Pubs/UCBRev.pdf}. 44 | #' EXP3 is a method which needs to specify exploration rate \code{gamma} and 45 | #' exploitation rate \code{eta}. For algorithm details, see the paper 46 | #' \url{https://cseweb.ucsd.edu/~yfreund/papers/bandits.pdf}. 47 | #' 48 | #' Ensemble methods are also implemented. "Greedy-Thompson-Sampling" and 49 | #' "Greedy-Bayes-Poisson-TS" allocate \eqn{1 - epsilon} traffic to the arm 50 | #' corresponding to the largest 51 | #' Thompson sampling weight and allocate \eqn{epsilon} traffic 52 | #' corresponding to Thompson sampling weights. 53 | #' Instead of using average reward for each period to update weights in "EXP3", 54 | #' "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS" use Thompson sampling 55 | #' weights in the updating formula in "EXP3". 56 | #' "HyperTS" is an ensemble by applying Thompson Sampling to selecting the best 57 | #' method in each period based on previous performance. For algorithm details, 58 | #' see the paper 59 | #' \url{http://yxjiang.github.io/paper/RecSys2014-ensemble-bandit.pdf}. 60 | #' 61 | #' To measure the performance. Regret is computed by summing over the products 62 | #' of the number of pulls on one arm at one period and 63 | #' the difference of the mean reward of that arm compared with the largest one. 64 | #' Relative regret is 65 | #' computed by dividing the regret of a certain method over the regret of the 66 | #' benchmark method that allocates equal weights to each arm 67 | #' throughout all the periods. 68 | #' 69 | #' 70 | #' @param method A character string choosing from "Epsilon-Greedy", 71 | #' "Epsilon-Decreasing", "Thompson-Sampling", 72 | #' "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling", 73 | #' "EXP3-Thompson-Sampling", 74 | #' "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS" and "HyperTS". 75 | #' For details of these methods, see below. Default is "Thompson-Sampling". 76 | #' @param method.par A list of parameters needed for different methods: 77 | #' 78 | #' \code{epsilon}: A real number between 0 and 1; needed for "Epsilon-Greedy", 79 | #' "Epsilon-Decreasing", "Greedy-Thompson-Sampling" and "Greedy-Bayes-Poisson-TS". 80 | #' 81 | #' \code{ndraws.TS}: A positive integer specifying the number of random draws 82 | #' from the posterior; 83 | #' needed for "Thompson-Sampling", "Greedy-Thompson-Sampling" and 84 | #' "EXP3-Thompson-Sampling". Default is 1000. 85 | #' 86 | #' \code{EXP3}: A list consisting of two real numbers \code{eta} and \code{gamma}; 87 | #' \eqn{eta > 0} and \eqn{0 <= gamma < 1}; needed for "EXP3", 88 | #' "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS". 89 | #' 90 | #' \code{BP}: A list consisting of three postive integers \code{iter.BP}, 91 | #' \code{ndraws.BP} and \code{interval.BP}; 92 | #' needed for "Bayes-Poisson-TS", "Greedy-Bayes-Poisson-TS" and 93 | #' "EXP3-Bayes-Poisson-TS"; \code{iter.BP} specifies the number of iterations 94 | #' to compute posterior; 95 | #' \code{ndraws.BP} specifies the number of posterior samples 96 | #' drawn from posterior distribution; \code{interval.BP} is specified to draw 97 | #' each posterior sample from a sample sequence of length \code{interval.BP}. 98 | #' 99 | #' \code{HyperTS}: A list consisting of a vector \code{method.list}, 100 | #' needed for "HyperTS". \code{method.list} is a vector of character strings 101 | #' choosing from "Epsilon-Greedy", "Epsilon-Decreasing", "Thompson-Sampling", 102 | #' "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling", 103 | #' "EXP3-Thompson-Sampling", "Greedy-Bayes-Poisson-TS" and 104 | #' "EXP3-Bayes-Poisson-TS". "HyperTS" will construct an ensemble consisting all 105 | #' the methods in \code{method.list}. 106 | #' @param nburnin A positive integer specifying the number of periods to 107 | #' allocate each arm equal traffic before applying any strategy. 108 | #' @param nperiod A positive integer specifying the number of periods 109 | #' to apply the strategy. 110 | #' @param npulls.per.period A positive integer or a vector of positive integers. 111 | #' Default value is 1. If \code{npulls.per.period} is a positive integer, 112 | #' the number of pulls is \code{npulls.per.period} for each period. 113 | #' If \code{npulls.per.period} is a vector, each element represents 114 | #' the number of pulls for one period; the length of \code{npulls.per.period} 115 | #' should be equal to \code{nburnin} + \code{nperiod}. 116 | #' @param reward.family A character string specifying the distribution family 117 | #' of reward. Available distribution includes 118 | #' "Bernoulli", "Poisson" and "Gaussian". If "Gaussian" is chosen to be the 119 | #' reward distribution, 120 | #' a vector of standard deviation should be provided in \code{sd.reward}. 121 | #' @param sd.reward A vector of non-negative numbers specifying 122 | #' standard deviation of each arm's reward distribution if "Gaussian" is chosen 123 | #' to be the reward distribution. Default to be NULL. 124 | #' See \code{reward.family}. 125 | #' @param mean.reward A vector or a matrix of real numbers specifying the mean 126 | #' reward of each arm. If \code{mean.reward} is a vector, each element is the 127 | #' mean reward for each arm and the mean reward of each arm is unchanged 128 | #' throughout all periods (corresponding to the stationary Multi-Armed Bandit). 129 | #' If \code{mean.reward} is a matrix, it should 130 | #' have (\code{nburnin} + \code{nperiod}) rows. The mean reward of each arm 131 | #' could change. Each row represents a mean reward vector for each period 132 | #' (corresponding to nonstationary and adversarial Multi-Armed Bandit). 133 | #' @param weight.plot A logic value with FALSE as default. If TRUE, weight plot 134 | #' object for each arm is returned. 135 | #' @param regret.plot A logic value with FALSE as default. If TRUE, relative 136 | #' regret plot object is returned. 137 | #' 138 | #' @return a list consisting of: 139 | #' \item{weight}{A weight matrix whose each element is the allocated weight 140 | #' for each arm and period. Each row represents one arm and each column 141 | #' represents one period.} 142 | #' \item{regret}{A relative regret vector whose each element is relative regret 143 | #' for each period. For definition of relative regret, see above.} 144 | #' \item{weight.plot.object}{If weight.plot = TRUE, a ggplot object is returned.} 145 | #' \item{regret.plot.object}{If regret.plot = TRUE, a ggplot object is returned.} 146 | #' @export 147 | #' @examples 148 | #' ### Simulate Thompson-Sampling 149 | #' set.seed(100) 150 | #' res <- SimulateMultiplePeriods(method = "Thompson-Sampling", 151 | #' method.par = list(ndraws.TS = 1000), 152 | #' nburnin = 30, 153 | #' nperiod = 180, 154 | #' npulls.per.period = 5, 155 | #' reward.family = "Bernoulli", 156 | #' mean.reward = runif(3, 0, 0.1), 157 | #' weight.plot = TRUE) 158 | #' res$weight.plot.object 159 | #' ### Simulate EXP3-Thompson-Sampling 160 | #' set.seed(100) 161 | #' res <- SimulateMultiplePeriods(method = "EXP3-Thompson-Sampling", 162 | #' method.par = list(ndraws.TS = 1000, 163 | #' EXP3 = list(gamma = 0, eta = 0.1)), 164 | #' nburnin = 30, 165 | #' nperiod = 180, 166 | #' npulls.per.period = 5, 167 | #' reward.family = "Bernoulli", 168 | #' mean.reward = runif(3, 0, 0.1), 169 | #' weight.plot = TRUE) 170 | #' res$weight.plot.object 171 | #' ### Simulate ensemble method HyperTS given "Thompson-Sampling", "Epsilon-Greedy" and "Epsilon-Decreasing" 172 | #' set.seed(100) 173 | #' res <- SimulateMultiplePeriods(method = "HyperTS", 174 | #' method.par = list(ndraws.TS = 1000, 175 | #' epsilon = 0.1, 176 | #' HyperTS = list(method.list = c("Thompson-Sampling", 177 | #' "Epsilon-Greedy", 178 | #' "Epsilon-Decreasing"))), 179 | #' nburnin = 30, 180 | #' nperiod = 180, 181 | #' npulls.per.period = 5, 182 | #' reward.family = "Poisson", 183 | #' mean.reward = runif(3, 0, 0.1), 184 | #' weight.plot = TRUE) 185 | #' res$weight.plot.object 186 | 187 | 188 | 189 | 190 | SimulateMultiplePeriods <- function(method = "Thompson-Sampling", 191 | method.par = list(ndraws.TS = 1000), 192 | nburnin, 193 | nperiod, 194 | reward.family, 195 | mean.reward, 196 | sd.reward = NULL, 197 | npulls.per.period = 1, 198 | weight.plot = FALSE, 199 | regret.plot = FALSE){ 200 | if (! method %in% c("Epsilon-Greedy", "Epsilon-Decreasing", 201 | "Thompson-Sampling","EXP3", "UCB", "Bayes-Poisson-TS", 202 | "Greedy-Thompson-Sampling", "EXP3-Thompson-Sampling", 203 | "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS", 204 | "HyperTS")){ 205 | stop("Please specify correct method names!") 206 | } 207 | 208 | if (is.vector(mean.reward)){ 209 | rewardVec <- mean.reward 210 | } 211 | 212 | 213 | if (length(npulls.per.period) == 1){ 214 | npullsVec <- npulls.per.period 215 | }else if(length(npulls.per.period) != nburnin + nperiod | 216 | min(npulls.per.period) <= 0){ 217 | stop("Please specify correct number of pulls per period!") 218 | } 219 | 220 | if (is.vector(mean.reward)){ 221 | bestReward <- max(rewardVec) 222 | n <- length(rewardVec) 223 | }else{ 224 | bestReward <- apply(mean.reward, 1, max) 225 | n <- dim(mean.reward)[2] 226 | } 227 | 228 | 229 | if (reward.family == "Gaussian" & 230 | (length(sd.reward) != n | anyNA(sd.reward))){ 231 | stop("Please specify correct standard deviation for Gaussian reward family!") 232 | } 233 | 234 | if (is.vector(mean.reward)){ 235 | if (length(npulls.per.period) == 1){ 236 | burninTrial <- rmultinom(1, nburnin * npullsVec, rep(1 / n, n)) 237 | }else{ 238 | burninTrial <- rmultinom(1, sum(npulls.per.period[1:nburnin]), rep(1 / n, n)) 239 | } 240 | burninReward <- apply(cbind(burninTrial, rewardVec, sd.reward), 1, 241 | GetReward, reward.family) 242 | }else{ 243 | burninTrial <- rep(0, n) 244 | burninReward <- rep(0, n) 245 | if (length(npulls.per.period) == 1){ 246 | for (period in 1:nburnin){ 247 | tempTrial <- rmultinom(1, npullsVec, rep(1 / n, n)) 248 | tempReward <- apply(cbind(tempTrial, mean.reward[period, ], sd.reward), 249 | 1, GetReward, reward.family) 250 | burninTrial <- burninTrial + tempTrial 251 | burninReward <- burninReward + tempReward 252 | } 253 | }else{ 254 | for (period in 1:nburnin){ 255 | tempTrial <- rmultinom(1, npulls.per.period[period], rep(1 / n, n)) 256 | tempReward <- apply(cbind(tempTrial, mean.reward[period, ], sd.reward), 257 | 1, GetReward, reward.family) 258 | burninTrial <- burninTrial + tempTrial 259 | burninReward <- burninReward + tempReward 260 | } 261 | } 262 | } 263 | burninEvent <- data.frame(trial = burninTrial, reward = burninReward) 264 | 265 | 266 | equalDailyRegret <- rep(0, nperiod) 267 | for (period in 1:nperiod){ 268 | if (length(npulls.per.period) == 1){ 269 | dailyTrial <- rep(npullsVec / n, n) 270 | }else{ 271 | dailyTrial <- rep(npulls.per.period[nburnin + period] / n, n) 272 | } 273 | if (is.vector(mean.reward)){ 274 | equalDailyRegret[period] <- sum(dailyTrial * (bestReward - rewardVec)) 275 | }else{ 276 | equalDailyRegret[period] <- 277 | sum(dailyTrial * (bestReward[nburnin + period] - mean.reward[nburnin + period, ])) 278 | } 279 | } 280 | 281 | 282 | allWeight <- cbind() 283 | all.event <- burninEvent 284 | dailyRegret <- rep(0, nperiod) 285 | 286 | weight <- as.vector(CalculateWeight(method = "Thompson-Sampling", 287 | sd.reward = sd.reward, 288 | reward.family = reward.family, 289 | all.event = all.event, 290 | method.par = list(ndraws.TS = 1000))) 291 | EXP3Info <- list(prevWeight = weight, 292 | EXP3Trial = burninTrial, 293 | EXP3Reward = burninReward) 294 | 295 | if (method == "HyperTS"){ 296 | nmethod <- length(method.par$HyperTS$method.list) 297 | total.reward <- rep(0, nmethod) 298 | if (reward.family == "Bernoulli"){ 299 | total.trial <- rep(0, nmethod) 300 | } 301 | if (reward.family == "Gaussian" | reward.family == "Poisson"){ 302 | total.trial <- rep(1, nmethod) 303 | } 304 | } 305 | 306 | 307 | 308 | 309 | for (period in 1:nperiod){ 310 | if (method == "HyperTS"){ 311 | ndraws <- 1000 312 | ans <- matrix(nrow = ndraws, ncol = nmethod) 313 | if (reward.family == "Bernoulli"){ 314 | for (i in 1:nmethod) ans[ ,i] <- rbeta(ndraws, total.reward[i] + 1, 315 | total.trial[i] - total.reward[i] + 1) 316 | } 317 | if (reward.family == "Gaussian"){ 318 | for (i in 1:nmethod) ans[ ,i] <- rnorm(ndraws, 319 | mean = total.reward[i] / total.trial[i], 320 | sd = sd.reward[i] / sqrt(total.trial[i])) 321 | } 322 | if (reward.family == "Poisson"){ 323 | for (i in 1:nmethod) ans[ ,i] <- 324 | rgamma(ndraws, shape = total.reward[i] + 1, 325 | scale = 1 / total.trial[i]) 326 | } 327 | method.index <- which.max(as.vector(table(factor(max.col(ans), 328 | levels = 1:nmethod)))) 329 | method.chosen <- method.par$HyperTS$method.list[method.index] 330 | weight <- CalculateWeight(method.chosen, 331 | all.event = all.event, 332 | sd.reward = sd.reward, 333 | reward.family = reward.family, 334 | method.par = method.par, 335 | period = period, 336 | EXP3Info = EXP3Info) 337 | } 338 | 339 | if (method != "HyperTS"){ 340 | weight <- CalculateWeight(method, 341 | all.event = all.event, 342 | sd.reward = sd.reward, 343 | reward.family = reward.family, 344 | method.par = method.par, 345 | period = period, 346 | EXP3Info = EXP3Info) 347 | } 348 | 349 | allWeight <- cbind(allWeight, weight) 350 | if (length(npulls.per.period) == 1){ 351 | dailyTrial <- rmultinom(1, npullsVec, weight) 352 | }else{ 353 | dailyTrial <- rmultinom(1, npulls.per.period[nburnin + period], weight) 354 | } 355 | if (is.vector(mean.reward)){ 356 | dailyReward <- apply(cbind(dailyTrial, rewardVec, sd.reward), 357 | 1, GetReward, reward.family) 358 | }else{ 359 | dailyReward <- 360 | apply(cbind(dailyTrial, mean.reward[nburnin + period, ], sd.reward), 361 | 1, GetReward, reward.family) 362 | } 363 | 364 | all.event$trial <- all.event$trial + dailyTrial 365 | all.event$reward <- all.event$reward + dailyReward 366 | if (is.vector(mean.reward)){ 367 | dailyRegret[period] <- sum(dailyTrial * (bestReward - rewardVec)) 368 | }else{ 369 | dailyRegret[period] <- 370 | sum(dailyTrial * 371 | (bestReward[nburnin + period] - mean.reward[nburnin + period, ])) 372 | } 373 | EXP3Info = list(prevWeight = weight, 374 | EXP3Trial = dailyTrial, 375 | EXP3Reward = dailyReward) 376 | 377 | if (method == "HyperTS"){ 378 | total.reward[method.index] <- total.reward[method.index] + sum(dailyReward) 379 | total.trial[method.index] <- total.trial[method.index] + sum(dailyTrial) 380 | } 381 | } 382 | relativeRegret <- ifelse(equalDailyRegret != 0, 383 | dailyRegret / equalDailyRegret, 0) 384 | 385 | if (weight.plot == TRUE){ 386 | weightVector <- c(t(allWeight)) 387 | if (is.vector(mean.reward)){ 388 | names <- rep(sapply(rewardVec, function(x) paste("Mean Reward =", x)), 389 | each = nperiod) 390 | }else{ 391 | names <- rep(sapply(1:n, function(x) paste("Arm", x)), each = nperiod) 392 | } 393 | 394 | periodSeq <- rep(1:nperiod, n) 395 | graphData <- data.frame(names, periodSeq, weightVector) 396 | weight.plot.object <- ggplot(graphData, 397 | aes(x = periodSeq, 398 | y = weightVector, 399 | colour = names)) + 400 | geom_line() + ggtitle("Weight Plot") + 401 | theme(legend.text = element_text(size = 12, face = "bold"), 402 | axis.text.y = element_text(size = 12), 403 | legend.title = element_blank()) + 404 | labs(y = "weight", x = "period") 405 | } 406 | if (regret.plot == TRUE){ 407 | periodSeq <- 1:nperiod 408 | graphData <- data.frame(periodSeq, relativeRegret) 409 | regret.plot.object <- ggplot(graphData, 410 | aes(x = periodSeq, 411 | y = relativeRegret)) + 412 | geom_line() + ggtitle("Regret Plot") + 413 | theme(legend.text = element_text(size = 12, face = "bold"), 414 | axis.text.y = element_text(size = 12), 415 | legend.title = element_blank()) + 416 | labs(y = "Relative Regret", x = "period") 417 | } 418 | 419 | if (weight.plot == TRUE & regret.plot == TRUE){ 420 | return(list(weight = allWeight, 421 | regret = relativeRegret, 422 | weight.plot.object = weight.plot.object, 423 | regret.plot.object = regret.plot.object)) 424 | } 425 | if (weight.plot == FALSE & regret.plot == TRUE){ 426 | return(list(weight = allWeight, 427 | regret = relativeRegret, 428 | regret.plot.object = regret.plot.object)) 429 | } 430 | if (weight.plot == TRUE & regret.plot == FALSE){ 431 | return(list(weight = allWeight, 432 | regret = relativeRegret, 433 | weight.plot.object = weight.plot.object)) 434 | } 435 | if (weight.plot == FALSE & regret.plot == FALSE){ 436 | return(list(weight = allWeight, regret = relativeRegret)) 437 | } 438 | } 439 | 440 | 441 | 442 | 443 | GetReward <- function(x, reward.family, sd.reward){ 444 | if (reward.family == "Bernoulli") { 445 | return(rbinom(n = 1, size = x[1], prob = x[2])) 446 | } 447 | if (reward.family == "Gaussian") { 448 | return(rnorm(n = 1, mean = x[1] * x[2], sd = x[3] * sqrt(x[1]))) 449 | } 450 | if (reward.family == "Poisson"){ 451 | return(rpois(n = 1, lambda = x[1] * x[2])) 452 | } 453 | } 454 | 455 | 456 | 457 | 458 | 459 | --------------------------------------------------------------------------------