├── .Rhistory
├── NAMESPACE
├── README~
├── MAB.Rproj
├── DESCRIPTION
├── README.md
├── test
    ├── run_tests.R
    └── test_MAB.R
├── CONTRIBUTING.md
├── man
    ├── CalculateWeight.Rd
    ├── SimulateMultipleMethods.Rd
    └── SimulateMultiplePeriods.Rd
├── LICENSE
└── R
    ├── CalculateWeight.R
    ├── CalculateWeight.R~
    ├── SimulateMultipleMethods.R
    ├── SimulateMultipleMethods.R~
    ├── SimulateMultiplePeriods.R
    └── SimulateMultiplePeriods.R~


/.Rhistory:
--------------------------------------------------------------------------------
1 | install_github("google/MAB")
2 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 | 
3 | export(CalculateWeight)
4 | export(SimulateMultipleMethods)
5 | export(SimulateMultiplePeriods)
6 | 


--------------------------------------------------------------------------------
/README~:
--------------------------------------------------------------------------------
1 | This is not an official Google product.
2 | 
3 | To install MAB package, download the package and run the following code in 
4 | command line:
5 | R CMD INSTALL FILE.PATH
6 | 
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/MAB.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | Encoding: UTF-8
 9 | 
10 | AutoAppendNewline: Yes
11 | StripTrailingWhitespace: Yes
12 | 
13 | BuildType: Package
14 | PackageUseDevtools: Yes
15 | PackageInstallArgs: --no-multiarch --with-keep.source
16 | PackageRoxygenize: rd,collate,namespace
17 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: MAB
 2 | Title: Multi-Armed Bandit Strategies Implementation and Simulation
 3 | Version: 1.0.0
 4 | Authors@R: c(
 5 |     person("Yunbo", "Ouyang", email = "youyang4@illinois.edu", role = c("aut")),
 6 |     person("Shuchao", "Bi", email = "shuchaobi@google.com", role = c("aut", "cre")),
 7 |     person("Zoey", "Chu", email = "chuwanghuan@google.com", role = c("aut")),
 8 |     person("Chunqiu", "Zeng", email = "chunqiuzeng@google.com", role = c("aut")))
 9 | Description: Implement various multi-armed bandit strategies and
10 |     conduct comparison studies via simulation.
11 | Depends:
12 |     R (>= 3.2.2), ggplot2, emre, assertthat
13 | License: Apache License 2.0
14 | Encoding: UTF-8
15 | LazyData: true
16 | RoxygenNote: 5.0.1
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MAB
 2 | 
 3 | R package MAB is created to
 4 | implement strategies for stationary and non-stationary multi-armed bandit problems. 
 5 | Various widely-used strategies and their ensembles are included in this package.
 6 | This package is designed to compare different strategies in multi-armed bandit problems
 7 | and help users to choose suitable strategies with suitable tuning parameters
 8 | in different scenarios. This is not an official Google product. 
 9 | 
10 | 
11 | Install the Package
12 | -------------------
13 | 
14 | MAB depends on R package [emre](https://github.com/google/emre). To install MAB package, download the package and run the following code in 
15 | command line:
16 | 
17 | R CMD INSTALL FILE.PATH
18 | 
19 | Another way is to install devtools package first and then run the following code
20 | in R:
21 | 
22 | library(devtools)
23 | 
24 | install_github("google/MAB")
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/test/run_tests.R:
--------------------------------------------------------------------------------
 1 | # Copyright 2012-2017 Google
 2 | # Licensed under the Apache License, Version 2.0 (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     http://www.apache.org/licenses/LICENSE-2.0
 7 | #
 8 | # Unless required by applicable law or agreed to in writing, software
 9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | 
14 | library(testthat)
15 | 
16 | source("/google/src/cloud/youyang/rpackage/google3/experimental/users/youyang/MAB/R/CalculateWeight.R")
17 | 
18 | test_results <- test_dir("/google/src/cloud/youyang/rpackage/google3/experimental/users/youyang/MAB/test",
19 |                          reporter="summary")
20 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project. There are
 4 | just a few small guidelines you need to follow.
 5 | 
 6 | ## Contributor License Agreement
 7 | 
 8 | Contributions to this project must be accompanied by a Contributor License
 9 | Agreement. You (or your employer) retain the copyright to your contribution,
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to <https://cla.developers.google.com/> to see
12 | your current agreements on file or to sign a new one.
13 | 
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 | 
18 | ## Code reviews
19 | 
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 | 


--------------------------------------------------------------------------------
/test/test_MAB.R:
--------------------------------------------------------------------------------
 1 | # Copyright 2012-2017 Google
 2 | # Licensed under the Apache License, Version 2.0 (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     http://www.apache.org/licenses/LICENSE-2.0
 7 | #
 8 | # Unless required by applicable law or agreed to in writing, software
 9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | 
14 | test_that("Test Epsilon-Greedy",{
15 |   expect_equal(CalculateWeight(method = "Epsilon-Greedy",
16 |                                method.par = list(epsilon = 0.01),
17 |                                all.event = data.frame(reward = 1:3,
18 |                                                       trial = rep(10, 3)),
19 |                                reward.family = "Bernoulli"),
20 |                c(0.005, 0.005, 0.990))})
21 | test_that("Test Epsilon-Decreasing",{
22 |   expect_equal(CalculateWeight(method = "Epsilon-Decreasing",
23 |                                method.par = list(epsilon = 0.01),
24 |                                all.event = data.frame(reward = 1:3,
25 |                                                       trial = rep(10, 3)),
26 |                                reward.family = "Bernoulli"),
27 |                c(0.005, 0.005, 0.990))})
28 | test_that("Test UCB",{
29 |   expect_equal(CalculateWeight(method = "UCB",
30 |                                method.par = list(epsilon = 0.01),
31 |                                all.event = data.frame(reward = 1:3,
32 |                                                       trial = rep(10, 3)),
33 |                                reward.family = "Bernoulli"),
34 |                c(0, 0, 1))})
35 | 


--------------------------------------------------------------------------------
/man/CalculateWeight.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/CalculateWeight.R
  3 | \name{CalculateWeight}
  4 | \alias{CalculateWeight}
  5 | \title{Calculate the probability of pulling each arm in the next period for various
  6 | strategies}
  7 | \usage{
  8 | CalculateWeight(method = "Thompson-Sampling", method.par = list(ndraws.TS =
  9 |   1000), all.event, reward.family, sd.reward = NULL, period = 1,
 10 |   EXP3Info = NULL)
 11 | }
 12 | \arguments{
 13 | \item{method}{A character string choosing from "Epsilon-Greedy",
 14 | "Epsilon-Decreasing", "Thompson-Sampling",
 15 | "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling",
 16 | "EXP3-Thompson-Sampling",
 17 | "Greedy-Bayes-Poisson-TS" and "EXP3-Bayes-Poisson-TS".
 18 | See \code{\link{SimulateMultiplePeriods}} for more details.
 19 | Default is "Thompson-Sampling".}
 20 | 
 21 | \item{method.par}{A list of parameters needed for different methods:
 22 | 
 23 | \code{epsilon}: A real number between 0 and 1; needed for "Epsilon-Greedy",
 24 | "Epsilon-Decreasing", "Greedy-Thompson-Sampling" and
 25 | "Greedy-Bayes-Poisson-TS".
 26 | 
 27 | \code{ndraws.TS}: A positive integer specifying the number of random draws
 28 | from the posterior;
 29 | needed for "Thompson-Sampling", "Greedy-Thompson-Sampling" and
 30 | "EXP3-Thompson-Sampling".  Default is 1000.
 31 | 
 32 | \code{EXP3}: A list consisting of two real numbers \code{eta} and
 33 | \code{gamma};
 34 | \eqn{eta > 0} and \eqn{0 <= gamma < 1}; needed for "EXP3",
 35 | "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS".
 36 | 
 37 | \code{BP}: A list consisting of three postive integers \code{iter.BP},
 38 | \code{ndraws.BP} and \code{interval.BP};
 39 | needed for "Bayes-Poisson-TS", "Greedy-Bayes-Poisson-TS" and
 40 | "EXP3-Bayes-Poisson-TS"; \code{iter.BP} specifies the number of iterations
 41 | to compute posterior;
 42 | \code{ndraws.BP} specifies the number of posterior samples drawn from
 43 | posterior distribution; \code{interval.BP} is specified to draw each
 44 | posterior sample from
 45 |  a sample sequence of length \code{interval.BP}.}
 46 | 
 47 | \item{all.event}{A data frame containing two columns \code{trial} and
 48 | \code{reward} with the number of rows equal to the number of arms.
 49 | Each element of \code{trial} and \code{reward} represents the number of
 50 | trials and the total reward for each arm respectively.}
 51 | 
 52 | \item{reward.family}{A character string specifying the distribution family
 53 | of reward. Available distribution includes
 54 |  "Bernoulli", "Poisson" and "Gaussian". If "Gaussian" is chosen to be the
 55 | reward distribution,
 56 | a vector of standard deviation should be provided in \code{sd.reward}.}
 57 | 
 58 | \item{sd.reward}{A vector of non-negative numbers specifying standard
 59 | deviation of each arm's reward distribution if "Gaussian" is chosen to be
 60 | the reward distribution. Default to be NULL.
 61 | See \code{reward.family}.}
 62 | 
 63 | \item{period}{A positive integer specifying the period index.
 64 | Default to be 1.}
 65 | 
 66 | \item{EXP3Info}{A list of three vectors \code{prevWeight}, \code{EXP3Trial}
 67 | and \code{EXP3Reward} with dimension equal to the number of arms,
 68 | needed for "EXP3", "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS":
 69 | 
 70 | \code{prevWeight}: the weight vector in the previous EXP3 iteration.
 71 | 
 72 | \code{EXP3Trial} and \code{EXP3Reward}: vectors representing
 73 | the number of trials and the total reward for each arm in
 74 | the previous period respectively.
 75 | 
 76 | See \code{\link{SimulateMultiplePeriods}} for more details.}
 77 | }
 78 | \value{
 79 | A normalized weight vector for future randomized allocation.
 80 | }
 81 | \description{
 82 | This function is aimed to compute the probability of pulling each arm for
 83 | various methods in Multi-Armed Bandit given the total reward and the number
 84 | of trials for each arm.
 85 | }
 86 | \examples{
 87 | ### Calculate weights using Thompson Sampling if reward follows Poisson
 88 | distribution.
 89 | set.seed(100)
 90 | CalculateWeight(method = "Thompson-Sampling",
 91 |                 method.par = list(ndraws.TS = 1000),
 92 |                 all.event = data.frame(reward = 1:3, trial = rep(10, 3)),
 93 |                 reward.family = "Poisson")
 94 | ### Calculate weights using EXP3
 95 | CalculateWeight(method = "EXP3",
 96 |                 method.par = list(EXP3 = list(gamma = 0.01, eta =0.1)),
 97 |                 all.event = data.frame(reward = 1:3, trial = rep(10, 3)),
 98 |                 reward.family = "Bernoulli",
 99 |                 EXP3Info = list(prevWeight = rep(1, 3),
100 |                                 EXP3Trial = rep(5, 3),
101 |                                 EXP3Reward = 0:2))
102 | }
103 | 
104 | 


--------------------------------------------------------------------------------
/man/SimulateMultipleMethods.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/SimulateMultipleMethods.R
  3 | \name{SimulateMultipleMethods}
  4 | \alias{SimulateMultipleMethods}
  5 | \title{Compare various strategies for Multi-Armed Bandit in stationary
  6 | and non-stationary scenarios}
  7 | \usage{
  8 | SimulateMultipleMethods(method = "Thompson-Sampling",
  9 |   method.par = list(ndraws.TS = 1000), iter, nburnin, nperiod,
 10 |   reward.mean.family, reward.family, narms.family, npulls.family,
 11 |   stationary = TRUE, nonstationary.type = NULL, data.par,
 12 |   regret.plot = FALSE)
 13 | }
 14 | \arguments{
 15 | \item{method}{A vector of character strings choosing from "Epsilon-Greedy",
 16 | "Epsilon-Decreasing", "Thompson-Sampling",
 17 | "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling",
 18 | "EXP3-Thompson-Sampling",
 19 | "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS"  and "HyperTS".
 20 | See \code{\link{SimulateMultiplePeriods}} for more details.
 21 | Default is "Thompson-Sampling".}
 22 | 
 23 | \item{method.par}{A list of parameters needed for different methods:
 24 | 
 25 | \code{epsilon}: A real number between 0 and 1; needed for "Epsilon-Greedy",
 26 | "Epsilon-Decreasing", "Greedy-Thompson-Sampling" and
 27 | "Greedy-Bayes-Poisson-TS".
 28 | 
 29 | \code{ndraws.TS}: A positive integer specifying
 30 | the number of random draws from the posterior;
 31 | needed for "Thompson-Sampling", "Greedy-Thompson-Sampling"
 32 | and "EXP3-Thompson-Sampling". Default is 1000.
 33 | 
 34 | \code{EXP3}: A list consisting of two real numbers \code{eta} and
 35 | \code{gamma};
 36 | \eqn{eta > 0} and \eqn{0 <= gamma < 1}; needed for "EXP3",
 37 | "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS".
 38 | 
 39 | \code{BP}: A list consisting of three postive integers \code{iter.BP},
 40 | \code{ndraws.BP} and \code{interval.BP};
 41 | needed for "Bayes-Poisson-TS", "Greedy-Bayes-Poisson-TS"
 42 | and "EXP3-Bayes-Poisson-TS"; \code{iter.BP} specifies the number
 43 | of iterations to compute posterior;
 44 | \code{ndraws.BP} specifies the number of posterior samples drawn
 45 | from posterior distribution; \code{interval.BP} is specified to
 46 | draw each posterior sample from
 47 |  a sample sequence of length \code{interval.BP}.
 48 | 
 49 | \code{HyperTS}: A list consisting of a vector \code{method.list},
 50 | needed for "HyperTS". \code{method.list} is a vector of character strings
 51 | choosing from "Epsilon-Greedy", "Epsilon-Decreasing", "Thompson-Sampling",
 52 | "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling",
 53 | "EXP3-Thompson-Sampling",
 54 | "Greedy-Bayes-Poisson-TS" and "EXP3-Bayes-Poisson-TS".
 55 | "HyperTS" will construct an ensemble consisting all the methods
 56 | in \code{method.list}.}
 57 | 
 58 | \item{iter}{A positive integer specifying the number of iterations.}
 59 | 
 60 | \item{nburnin}{A positive integer specifying the number of periods
 61 | to allocate each arm equal traffic before applying any strategy.}
 62 | 
 63 | \item{nperiod}{A positive integer specifying the number of periods
 64 | to apply various strategies.}
 65 | 
 66 | \item{reward.mean.family}{A character string specifying
 67 | the distribution family to generate mean reward of each arm.
 68 | Available distribution includes "Uniform", "Beta" and "Gaussian".}
 69 | 
 70 | \item{reward.family}{A character string specifying the distribution family
 71 | of reward. Available distribution includes
 72 |  "Bernoulli", "Poisson" and "Gaussian".
 73 | If "Gaussian" is chosen to be the reward distribution,
 74 |  a vector of standard deviation should be provided in
 75 | \code{sd.reward} in \code{data.par}.}
 76 | 
 77 | \item{narms.family}{A character string specifying the distribution family
 78 | of the number of arms. Available distribution includes "Poisson" and
 79 | "Binomial".}
 80 | 
 81 | \item{npulls.family}{A character string specifying the distribution family
 82 | of the number of pulls per period.
 83 | For continuous distribution, the number of pulls will be rounded up.
 84 | Available distribution includes "Log-Normal" and "Poisson".}
 85 | 
 86 | \item{stationary}{A logic value indicating whether a stationary
 87 | Multi-Armed Bandit is considered (corresponding to the case that
 88 | the reward mean is unchanged). Default to be TRUE.}
 89 | 
 90 | \item{nonstationary.type}{A character string indicating
 91 | how the mean reward varies. Available types include "Random Walk" and
 92 | "Geometric Random Walk"
 93 | (reward mean follows random walk in the log scale). Default to be NULL.}
 94 | 
 95 | \item{data.par}{A list of data generating parameters:
 96 | 
 97 | \code{reward.mean}: A list of parameters of \code{reward.mean.family}:
 98 | \code{min} and \code{max} are two real numbers specifying
 99 | the bounds when \eqn{reward.mean.family = "Uniform"}; \code{shape1} and
100 | \code{shape2} are two shape parameters when
101 | \eqn{reward.mean.family = "Beta"};
102 | \code{mean} and \code{sd} specify mean and standard deviation
103 | when \eqn{reward.mean.family = "Gaussian"}.
104 | 
105 | \code{reward.family}: A list of parameters of \code{reward.family}:
106 | \code{sd} is a vector of non-negative numbers specifying standard deviation
107 | of each arm's reward distribution
108 | if "Gaussian" is chosen to be the reward distribution.
109 | 
110 | \code{narms.family}: A list of parameters of \code{narms.family}:
111 | \code{lambda} is a positive parameter specifying the mean when
112 | \eqn{narms.family = "Poisson"}; \code{size} and \code{prob}
113 | are 2 parameters needed when \eqn{narms.family = "Binomial"}.
114 | 
115 | \code{npulls.family}: A list of parameters of \code{npulls.family}:
116 | \code{meanlog} and \code{sdlog} are 2 positive parameters specifying the mean
117 | and standard deviation in the log scale
118 |  when \eqn{npulls.family = "Log-Normal"};
119 | \code{lambda} is a positive parameter
120 | specifying the mean when \eqn{npulls.family = "Poisson"}.
121 | 
122 | \code{nonstationary.family}:
123 | A list of parameters of \code{nonstationary.type}:
124 | \code{sd} is a positive parameter specifying the standard deviation
125 | of white noise
126 |  when \eqn{nonstationary.type = "Random Walk"}; \code{sdlog} is
127 | a positive parameter specifying the log standard deviation of white noise
128 |  when \eqn{nonstationary.type = "Geometric Random Walk"}.}
129 | 
130 | \item{regret.plot}{A logic value indicating whether an average regret plot
131 | is returned. Default to be FALSE.}
132 | }
133 | \value{
134 | a list consisting of:
135 | \item{regret.matrix}{A three-dimensional array with each dimension corresponding to the period, iteration and method.}
136 | \item{regret.plot.object}{If regret.plot = TRUE, a ggplot object is returned.}
137 | }
138 | \description{
139 | This function is aimed to simulate data in different scenarios to
140 | compare various strategies in Multi-Armed Bandit.
141 | Users can specify the distribution of the number of arms,
142 | the distribution of mean reward, the distribution of the number of pulls
143 | in one period and the stationariness to simulate different scenarios.
144 |  Relative regret is returned and average relative regret plot is returned
145 | if needed.
146 | See \code{\link{SimulateMultiplePeriods}} for more details.
147 | }
148 | \examples{
149 | ### Compare Epsilon-Greedy and Thompson Sampling in the stationary case.
150 | set.seed(100)
151 | res <- SimulateMultipleMethods(
152 |            method = c("Epsilon-Greedy", "Thompson-Sampling"),
153 |            method.par = list(epsilon = 0.1, ndraws.TS = 1000),
154 |            iter = 100,
155 |            nburnin = 30,
156 |            nperiod = 180,
157 |            reward.mean.family = "Uniform",
158 |            reward.family = "Bernoulli",
159 |            narms.family = "Poisson",
160 |            npulls.family = "Log-Normal",
161 |            data.par = list(reward.mean = list(min = 0, max = 0.1),
162 |                            npulls.family = list(meanlog = 3, sdlog = 1.5),
163 |                            narms.family = list(lambda = 5)),
164 |            regret.plot = TRUE)
165 | res$regret.plot.object
166 | ### Compare Epsilon-Greedy, Thompson Sampling and EXP3 in the non-stationary case.
167 | set.seed(100)
168 | res <- SimulateMultipleMethods(
169 |            method = c("Epsilon-Greedy", "Thompson-Sampling", "EXP3"),
170 |            method.par = list(epsilon = 0.1,
171 |                              ndraws.TS = 1000,
172 |                              EXP3 = list(gamma = 0, eta = 0.1)),
173 |            iter = 100,
174 |            nburnin = 30,
175 |            nperiod = 90,
176 |            reward.mean.family = "Beta",
177 |            reward.family = "Bernoulli",
178 |            narms.family = "Binomial",
179 |            npulls.family = "Log-Normal",
180 |            stationary = FALSE,
181 |            nonstationary.type = "Geometric Random Walk",
182 |            data.par = list(reward.mean = list(shape1 = 2, shape2 = 5),
183 |                            npulls.family = list(meanlog = 3, sdlog = 1),
184 |                            narms.family = list(size = 10, prob = 0.5),
185 |                            nonstationary.family = list(sdlog = 0.05)),
186 |            regret.plot = TRUE)
187 | res$regret.plot.object
188 | }
189 | 
190 | 


--------------------------------------------------------------------------------
/man/SimulateMultiplePeriods.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/SimulateMultiplePeriods.R
  3 | \name{SimulateMultiplePeriods}
  4 | \alias{SimulateMultiplePeriods}
  5 | \title{Simulate strategies for Multi-Armed Bandit in multiple periods}
  6 | \usage{
  7 | SimulateMultiplePeriods(method = "Thompson-Sampling",
  8 |   method.par = list(ndraws.TS = 1000), nburnin, nperiod, reward.family,
  9 |   mean.reward, sd.reward = NULL, npulls.per.period = 1,
 10 |   weight.plot = FALSE, regret.plot = FALSE)
 11 | }
 12 | \arguments{
 13 | \item{method}{A character string choosing from "Epsilon-Greedy",
 14 | "Epsilon-Decreasing", "Thompson-Sampling",
 15 | "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling",
 16 | "EXP3-Thompson-Sampling",
 17 | "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS" and "HyperTS".
 18 | For details of these methods, see below. Default is "Thompson-Sampling".}
 19 | 
 20 | \item{method.par}{A list of parameters needed for different methods:
 21 | 
 22 | \code{epsilon}: A real number between 0 and 1; needed for "Epsilon-Greedy",
 23 | "Epsilon-Decreasing", "Greedy-Thompson-Sampling" and
 24 | "Greedy-Bayes-Poisson-TS".
 25 | 
 26 | \code{ndraws.TS}: A positive integer specifying the number of random draws
 27 | from the posterior;
 28 | needed for "Thompson-Sampling", "Greedy-Thompson-Sampling" and
 29 | "EXP3-Thompson-Sampling".  Default is 1000.
 30 | 
 31 | \code{EXP3}: A list consisting of two real numbers \code{eta} and
 32 | \code{gamma};
 33 | \eqn{eta > 0} and \eqn{0 <= gamma < 1}; needed for "EXP3",
 34 | "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS".
 35 | 
 36 | \code{BP}: A list consisting of three postive integers \code{iter.BP},
 37 | \code{ndraws.BP} and \code{interval.BP};
 38 | needed for "Bayes-Poisson-TS", "Greedy-Bayes-Poisson-TS" and
 39 | "EXP3-Bayes-Poisson-TS"; \code{iter.BP} specifies the number of iterations
 40 | to compute posterior;
 41 | \code{ndraws.BP} specifies the number of posterior samples
 42 | drawn from posterior distribution; \code{interval.BP} is specified to draw
 43 | each posterior sample from a sample sequence of length \code{interval.BP}.
 44 | 
 45 | \code{HyperTS}: A list consisting of a vector \code{method.list},
 46 | needed for "HyperTS". \code{method.list} is a vector of character strings
 47 | choosing from "Epsilon-Greedy", "Epsilon-Decreasing", "Thompson-Sampling",
 48 | "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling",
 49 | "EXP3-Thompson-Sampling", "Greedy-Bayes-Poisson-TS" and
 50 | "EXP3-Bayes-Poisson-TS". "HyperTS" will construct an ensemble consisting all
 51 | the methods in \code{method.list}.}
 52 | 
 53 | \item{nburnin}{A positive integer specifying the number of periods to
 54 | allocate each arm equal traffic before applying any strategy.}
 55 | 
 56 | \item{nperiod}{A positive integer specifying the number of periods
 57 | to apply the strategy.}
 58 | 
 59 | \item{reward.family}{A character string specifying the distribution family
 60 | of reward. Available distribution includes
 61 |  "Bernoulli", "Poisson" and "Gaussian". If "Gaussian" is chosen to be the
 62 | reward distribution,
 63 | a vector of standard deviation should be provided in \code{sd.reward}.}
 64 | 
 65 | \item{mean.reward}{A vector or a matrix of real numbers specifying the mean
 66 | reward of each arm. If \code{mean.reward} is a vector, each element is the
 67 | mean reward for each arm and the mean reward of each arm is unchanged
 68 | throughout all periods (corresponding to the stationary Multi-Armed Bandit).
 69 |  If \code{mean.reward} is a matrix, it should
 70 |  have (\code{nburnin} + \code{nperiod}) rows. The mean reward of each arm
 71 | could change. Each row represents a mean reward vector for each period
 72 |   (corresponding to nonstationary and adversarial Multi-Armed Bandit).}
 73 | 
 74 | \item{sd.reward}{A vector of non-negative numbers specifying
 75 | standard deviation of each arm's reward distribution if "Gaussian" is chosen
 76 | to be the reward distribution. Default to be NULL.
 77 | See \code{reward.family}.}
 78 | 
 79 | \item{npulls.per.period}{A positive integer  or a vector of positive
 80 | integers. Default value is 1. If \code{npulls.per.period} is a positive
 81 | integer, the number of pulls is \code{npulls.per.period} for each period.
 82 | If \code{npulls.per.period} is a vector, each element represents
 83 | the number of pulls for one period; the length of \code{npulls.per.period}
 84 | should be equal to \code{nburnin} + \code{nperiod}.}
 85 | 
 86 | \item{weight.plot}{A logic value with FALSE as default. If TRUE, weight plot
 87 | object for each arm is returned.}
 88 | 
 89 | \item{regret.plot}{A logic value with FALSE as default. If TRUE,  relative
 90 | regret plot object is returned.}
 91 | }
 92 | \value{
 93 | a list consisting of:
 94 | \item{weight}{A weight matrix whose each element is the allocated weight
 95 | for each arm and period. Each row represents one arm and each column
 96 | represents one period.}
 97 | \item{regret}{A relative regret vector whose each element is relative regret
 98 | for each period. For definition of relative regret, see above.}
 99 | \item{weight.plot.object}{If weight.plot = TRUE, a ggplot object is returned.}
100 | \item{regret.plot.object}{If regret.plot = TRUE, a ggplot object is returned.}
101 | }
102 | \description{
103 | This function is aimed to simulate data to run
104 | strategies of Multi-Armed Bandit in a sequence of periods. Weight plot
105 | and regret plot are provided if needed. In each period there could be
106 | multiple pulls and each method can only be applied once. The default setting
107 | is that in each period there is only 1 pull, corresponding to continuous
108 | updating.
109 | }
110 | \details{
111 | Various methods have been implemented. "Epsilon-Greedy" and
112 | "Epsilon-Decreasing" allocates \eqn{1 - epsilon} traffic to the arm which has
113 | the largest average reward and equally distribute the traffic
114 | to other arms. For "Epsilon-Greedy" epsilon in \code{method.par} serves as
115 | constant exploration rate . For "Epsilon-Decreasing" epsilon in
116 | \code{method.par} serves as exploration rate at period 1,
117 | while in period \eqn{t} exploration rate is \eqn{epsilon / t}.
118 | See \url{https://en.wikipedia.org/wiki/Multi-armed_bandit#Approximate_solutions}
119 | for more details about these strategies.
120 | 
121 | "Thompson-Sampling" refers to Beta-Binomial Thompson Sampling using
122 | Beta(1, 1) as a prior. "Bayes-Poisson-TS" refers to Poisson-Gamma Thompson
123 | Sampling using a Bayesian Generalized Linear
124 | Mixed Effects Model to compute weights. "Bayes-Poisson-TS",
125 | "Greedy-Bayes-Poisson-TS" and "EXP3-Bayes-Poisson-TS" depends on the package
126 | "emre" to compute posterior distribution. For algorithm
127 | details, see the paper \url{https://arxiv.org/abs/1602.00047}.
128 | 
129 | UCB (Upper Confidence Bound) is a classical method for Multi-Armed Bandit.
130 | For algorithm details, see the paper
131 | \url{http://personal.unileoben.ac.at/rortner/Pubs/UCBRev.pdf}.
132 | EXP3 is a method which needs to specify exploration rate \code{gamma} and
133 | exploitation rate \code{eta}. For algorithm details, see the paper
134 | \url{https://cseweb.ucsd.edu/~yfreund/papers/bandits.pdf}.
135 | 
136 | Ensemble methods are also implemented. "Greedy-Thompson-Sampling" and
137 | "Greedy-Bayes-Poisson-TS" allocate \eqn{1 - epsilon} traffic to the arm
138 | corresponding to the largest
139 | Thompson sampling weight and allocate \eqn{epsilon} traffic
140 | corresponding to Thompson sampling weights.
141 | Instead of using average reward for each period to update weights in "EXP3",
142 | "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS" use Thompson sampling
143 | weights in the updating formula in "EXP3".
144 | "HyperTS" is an ensemble by applying Thompson Sampling to selecting the best
145 | method in each period based on previous performance. For algorithm details,
146 | see the paper
147 | \url{http://yxjiang.github.io/paper/RecSys2014-ensemble-bandit.pdf}.
148 | 
149 | To measure the performance. Regret is computed by summing over the products
150 | of the number of pulls on one arm at one period and
151 | the difference of the mean reward of that arm compared with the largest one.
152 | Relative regret is
153 | computed by dividing the regret of a certain method over the regret of the
154 | benchmark method that allocates equal weights to each arm
155 | throughout all the periods.
156 | }
157 | \examples{
158 | ### Simulate Thompson-Sampling
159 | set.seed(100)
160 | res <- SimulateMultiplePeriods(method = "Thompson-Sampling",
161 |                                method.par = list(ndraws.TS = 1000),
162 |                                nburnin = 30,
163 |                                nperiod = 180,
164 |                                npulls.per.period = 5,
165 |                                reward.family = "Bernoulli",
166 |                                mean.reward = runif(3, 0, 0.1),
167 |                                weight.plot = TRUE)
168 | res$weight.plot.object
169 | ### Simulate EXP3-Thompson-Sampling
170 | set.seed(100)
171 | res <- SimulateMultiplePeriods(
172 |            method = "EXP3-Thompson-Sampling",
173 |            method.par = list(ndraws.TS = 1000,
174 |                              EXP3 = list(gamma = 0, eta = 0.1)),
175 |            nburnin = 30,
176 |            nperiod = 180,
177 |            npulls.per.period = 5,
178 |            reward.family = "Bernoulli",
179 |            mean.reward = runif(3, 0, 0.1),
180 |            weight.plot = TRUE)
181 | res$weight.plot.object
182 | ### Simulate ensemble method HyperTS given "Thompson-Sampling", "Epsilon-Greedy" and "Epsilon-Decreasing"
183 | set.seed(100)
184 | res <- SimulateMultiplePeriods(
185 |            method = "HyperTS",
186 |            method.par = list(
187 |                ndraws.TS = 1000,
188 |                epsilon = 0.1,
189 |                HyperTS = list(method.list = c("Thompson-Sampling",
190 |                                               "Epsilon-Greedy",
191 |                                               "Epsilon-Decreasing"))),
192 |                nburnin = 30,
193 |                nperiod = 180,
194 |                npulls.per.period = 5,
195 |                reward.family = "Poisson",
196 |                mean.reward = runif(3, 0, 0.1),
197 |                weight.plot = TRUE)
198 | res$weight.plot.object
199 | }
200 | 
201 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 


--------------------------------------------------------------------------------
/R/CalculateWeight.R:
--------------------------------------------------------------------------------
  1 | # Copyright 2012-2017 Google
  2 | # Licensed under the Apache License, Version 2.0 (the "License");
  3 | # you may not use this file except in compliance with the License.
  4 | # You may obtain a copy of the License at
  5 | #
  6 | #     http://www.apache.org/licenses/LICENSE-2.0
  7 | #
  8 | # Unless required by applicable law or agreed to in writing, software
  9 | # distributed under the License is distributed on an "AS IS" BASIS,
 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 11 | # See the License for the specific language governing permissions and
 12 | # limitations under the License.
 13 | 
 14 | #' Calculate the probability of pulling each arm in the next period for various
 15 | #' strategies
 16 | #'
 17 | #' This function is aimed to compute the probability of pulling each arm for
 18 | #' various methods in Multi-Armed Bandit given the total reward and the number
 19 | #' of trials for each arm.
 20 | #'
 21 | #' @param method A character string choosing from "Epsilon-Greedy",
 22 | #' "Epsilon-Decreasing", "Thompson-Sampling",
 23 | #' "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling",
 24 | #' "EXP3-Thompson-Sampling",
 25 | #' "Greedy-Bayes-Poisson-TS" and "EXP3-Bayes-Poisson-TS".
 26 | #' See \code{\link{SimulateMultiplePeriods}} for more details.
 27 | #' Default is "Thompson-Sampling".
 28 | #' @param method.par A list of parameters needed for different methods:
 29 | #'
 30 | #' \code{epsilon}: A real number between 0 and 1; needed for "Epsilon-Greedy",
 31 | #' "Epsilon-Decreasing", "Greedy-Thompson-Sampling" and
 32 | #' "Greedy-Bayes-Poisson-TS".
 33 | #'
 34 | #' \code{ndraws.TS}: A positive integer specifying the number of random draws
 35 | #' from the posterior;
 36 | #' needed for "Thompson-Sampling", "Greedy-Thompson-Sampling" and
 37 | #' "EXP3-Thompson-Sampling".  Default is 1000.
 38 | #'
 39 | #' \code{EXP3}: A list consisting of two real numbers \code{eta} and
 40 | #' \code{gamma};
 41 | #' \eqn{eta > 0} and \eqn{0 <= gamma < 1}; needed for "EXP3",
 42 | #' "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS".
 43 | #'
 44 | #' \code{BP}: A list consisting of three postive integers \code{iter.BP},
 45 | #' \code{ndraws.BP} and \code{interval.BP};
 46 | #' needed for "Bayes-Poisson-TS", "Greedy-Bayes-Poisson-TS" and
 47 | #' "EXP3-Bayes-Poisson-TS"; \code{iter.BP} specifies the number of iterations
 48 | #' to compute posterior;
 49 | #' \code{ndraws.BP} specifies the number of posterior samples drawn from
 50 | #' posterior distribution; \code{interval.BP} is specified to draw each
 51 | #' posterior sample from
 52 | #'  a sample sequence of length \code{interval.BP}.
 53 | #' @param all.event A data frame containing two columns \code{trial} and
 54 | #' \code{reward} with the number of rows equal to the number of arms.
 55 | #' Each element of \code{trial} and \code{reward} represents the number of
 56 | #' trials and the total reward for each arm respectively.
 57 | #' @param reward.family A character string specifying the distribution family
 58 | #' of reward. Available distribution includes
 59 | #'  "Bernoulli", "Poisson" and "Gaussian". If "Gaussian" is chosen to be the
 60 | #' reward distribution,
 61 | #' a vector of standard deviation should be provided in \code{sd.reward}.
 62 | #' @param sd.reward A vector of non-negative numbers specifying standard
 63 | #' deviation of each arm's reward distribution if "Gaussian" is chosen to be
 64 | #' the reward distribution. Default to be NULL.
 65 | #' See \code{reward.family}.
 66 | #' @param period A positive integer specifying the period index.
 67 | #' Default to be 1.
 68 | #' @param EXP3Info A list of three vectors \code{prevWeight}, \code{EXP3Trial}
 69 | #' and \code{EXP3Reward} with dimension equal to the number of arms,
 70 | #' needed for "EXP3", "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS":
 71 | #'
 72 | #' \code{prevWeight}: the weight vector in the previous EXP3 iteration.
 73 | #'
 74 | #' \code{EXP3Trial} and \code{EXP3Reward}: vectors representing
 75 | #' the number of trials and the total reward for each arm in
 76 | #' the previous period respectively.
 77 | #'
 78 | #' See \code{\link{SimulateMultiplePeriods}} for more details.
 79 | #' @return A normalized weight vector for future randomized allocation.
 80 | #' @export
 81 | #' @examples
 82 | #' ### Calculate weights using Thompson Sampling if reward follows Poisson
 83 | #' distribution.
 84 | #' set.seed(100)
 85 | #' CalculateWeight(method = "Thompson-Sampling",
 86 | #'                 method.par = list(ndraws.TS = 1000),
 87 | #'                 all.event = data.frame(reward = 1:3, trial = rep(10, 3)),
 88 | #'                 reward.family = "Poisson")
 89 | #' ### Calculate weights using EXP3
 90 | #' CalculateWeight(method = "EXP3",
 91 | #'                 method.par = list(EXP3 = list(gamma = 0.01, eta =0.1)),
 92 | #'                 all.event = data.frame(reward = 1:3, trial = rep(10, 3)),
 93 | #'                 reward.family = "Bernoulli",
 94 | #'                 EXP3Info = list(prevWeight = rep(1, 3),
 95 | #'                                 EXP3Trial = rep(5, 3),
 96 | #'                                 EXP3Reward = 0:2))
 97 | 
 98 | CalculateWeight <- function(method = "Thompson-Sampling",
 99 |                             method.par = list(ndraws.TS = 1000),
100 |                             all.event,
101 |                             reward.family,
102 |                             sd.reward = NULL,
103 |                             period = 1,
104 |                             EXP3Info = NULL){
105 |   method.name <- c("Epsilon-Greedy", "Epsilon-Decreasing", "Thompson-Sampling",
106 |                    "EXP3", "UCB", "Bayes-Poisson-TS",
107 |                    "Greedy-Thompson-Sampling", "EXP3-Thompson-Sampling",
108 |                    "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS")
109 |   if (! method %in% method.name){
110 |     stop("Please specify correct method names!")
111 |   }
112 |   if (! reward.family %in% c("Bernoulli", "Poisson", "Gaussian")){
113 |     stop("Please specify correct reward family!")
114 |   }
115 |   if (method == method.name[1]){
116 |     if (! is.number(method.par$epsilon) ){
117 |       stop("Please specify correct parameters for Epsilon-Greedy!")
118 |     }
119 |     eps <- method.par$epsilon
120 |     rate <- ifelse(all.event$trial == 0, 0, all.event$reward / all.event$trial)
121 |     n <- length(rate)
122 |     maxIdx <- which(rate == max(rate))
123 |     maxCount <- length(maxIdx)
124 |     weight <- rep(eps / (n - maxCount), n)
125 |     weight[maxIdx] <- (1 - eps) / maxCount
126 |     return(weight)
127 |   }
128 | 
129 |   if (method == method.name[2]){
130 |     if (! is.number(method.par$epsilon)){
131 |       stop("Please specify correct parameters for Epsilon-Decreasing!")
132 |     }
133 |     eps <- method.par$epsilon / period
134 |     rate <- ifelse(all.event$trial == 0, 0, all.event$reward / all.event$trial)
135 |     n <- length(rate)
136 |     maxIdx <- which(rate == max(rate))
137 |     maxCount <- length(maxIdx)
138 |     weight <- rep(eps / (n - maxCount), n)
139 |     weight[maxIdx] <- (1 - eps) / maxCount
140 |     return(weight)
141 |   }
142 | 
143 |   if (method == method.name[3] |
144 |       method == method.name[7] |
145 |       method == method.name[8]){
146 |     if (! is.number(method.par$ndraws.TS)){
147 |       stop("Please specify correct parameters for Thompson-Sampling!")
148 |     }
149 |     ndraws.TS <- method.par$ndraws.TS
150 |     reward <- all.event$reward
151 |     trial <- all.event$trial
152 |     n <- length(reward)
153 |     ans <- matrix(nrow = ndraws.TS, ncol = n)
154 |     if (reward.family == "Bernoulli"){
155 |       failure <- trial - reward
156 |       for (i in 1:n) ans[ ,i] <- rbeta(ndraws.TS, shape1 = reward[i] + 1,
157 |                                        shape2 = failure[i] + 1)
158 |     }
159 |     if (reward.family == "Gaussian"){
160 |       for (i in 1:n) ans[ ,i] <- rnorm(ndraws.TS, mean = reward[i] / trial[i],
161 |                                        sd = sd.reward[i] / sqrt(trial[i]))
162 |     }
163 |     if (reward.family == "Poisson"){
164 |       for (i in 1:n) ans[ ,i] <- rgamma(ndraws.TS, shape = reward[i] + 1,
165 |                                         scale = 1 / trial[i])
166 |     }
167 |     w <- table(factor(max.col(ans), levels = 1:n))
168 |     if (method == method.name[3]){
169 |       return(as.vector(w / sum(w)))
170 |     }
171 |     if (method == method.name[7]){
172 |       if (! is.number(method.par$epsilon)){
173 |         stop("Please specify correct parameters for Greedy-Thompson-Sampling!")
174 |       }
175 |       n <- length(w)
176 |       maxIdx <- which(w == max(w))
177 |       maxCount <- length(maxIdx)
178 |       maxVector <- rep(0, n)
179 |       maxVector[maxIdx] <- 1 / maxCount
180 |       eps <- method.par$epsilon
181 |       return(as.vector(eps * w / sum(w) + (1 - eps) * maxVector))
182 |     }
183 |     if (method == method.name[8]){
184 |       if ( ! is.number(method.par$EXP3$gamma) |
185 |            ! is.number(method.par$EXP3$eta) ){
186 |         stop("Please specify correct parameters for EXP3-Thompson-Sampling!")
187 |       }
188 |       if (reward.family != "Bernoulli"){
189 |         stop("Please use Bernoulli Reward Family to run EXP3")
190 |       }
191 |       eta <- method.par$EXP3$eta
192 |       gamma <- method.par$EXP3$gamma
193 |       prevWeight <- EXP3Info$prevWeight
194 |       temp <- prevWeight * exp(eta * w / sum(w))
195 |       return(as.vector((1 - gamma) * temp / sum(temp) + gamma / n))
196 |     }
197 |   }
198 | 
199 |   if (method == method.name[4]){
200 |     if (! is.number(method.par$EXP3$gamma) | ! is.number(method.par$EXP3$eta)){
201 |       stop("Please specify correct parameters for EXP3!")
202 |     }
203 |     if (reward.family != "Bernoulli"){
204 |       stop("Please use Bernoulli Reward Family to run EXP3")
205 |     }
206 |     prevWeight <- EXP3Info$prevWeight
207 |     EXP3Trial <- EXP3Info$EXP3Trial
208 |     EXP3Reward <- EXP3Info$EXP3Reward
209 |     eta <- method.par$EXP3$eta
210 |     gamma <- method.par$EXP3$gamma
211 |     EXP3Rate <- ifelse(EXP3Trial == 0, 0, EXP3Reward / EXP3Trial)
212 |     temp <- prevWeight *
213 |         exp(eta * (EXP3Rate - max(EXP3Rate)) * sum(EXP3Trial) /
214 |             length(EXP3Trial))
215 |     result <- as.vector((1 - gamma) * temp / sum(temp) +
216 |                         gamma /length(EXP3Trial))
217 |     return(result)
218 |   }
219 | 
220 |   if (method == method.name[5]){
221 |     reward <- all.event$reward
222 |     trial <- all.event$trial
223 |     rate <- reward / trial
224 |     UCB <- rate + sqrt(2 * log(period) / trial)
225 |     maxIdx <- which(UCB == max(UCB))
226 |     maxCount <- length(maxIdx)
227 |     maxVector <- rep(0, length(reward))
228 |     maxVector[maxIdx] <- 1 / maxCount
229 |     return(maxVector)
230 |   }
231 | 
232 |   if (method == method.name[6] |
233 |       method == method.name[9] |
234 |       method == method.name[10]){
235 |     if ( ! is.number(method.par$BP$iter.BP) |
236 |          ! is.number(method.par$BP$ndraws.BP) |
237 |          ! is.number(method.par$BP$interval.BP)){
238 |       stop("Please specify correct parameters for Bayes-Poisson-TS!")
239 |     }
240 |     if (reward.family == "Gaussian"){
241 |       stop("Please not use Gaussian Reward Family to run Bayes-Poisson-TS!")
242 |     }
243 |     iter.BP <- method.par$BP$iter.BP
244 |     ndraws.BP <- method.par$BP$ndraws.BP
245 |     interval.BP <- method.par$BP$interval.BP
246 |     n <- length(all.event$trial)
247 |     temp <- all.event
248 |     temp$Id <- sapply(1:n, function(x) paste("Arm", x))
249 |     mdl <- SetupEMREoptim(
250 |       "reward ~ 1 + (1|Id) + offset(trial)",
251 |       data = temp, model.constructor = PoissonEMRE,
252 |       burnin = iter.BP - ndraws.BP * interval.BP,
253 |       thinning.interval = 1, llik.interval = 1)
254 |     mdlResult <- FitEMRE(mdl, max.iter = iter.BP, debug = FALSE)
255 |     posterior <- mdlResult$snapshots
256 |     useIdx <- seq(1, ndraws.BP * interval.BP, interval.BP)
257 |     armPos <- posterior[["1__Id"]][useIdx, ]
258 |     biasPos <- posterior[["__bias__"]][useIdx, ]
259 |     winnerPred <- sapply(1:length(biasPos), function(k) {
260 |       biasK <- biasPos[k]
261 |       armK <- armPos[k, ]
262 |       predK <- biasK  * armK
263 |       return(which(predK == max(predK))[1])
264 |     })
265 |     w <- table(factor(winnerPred, levels = 1:n))
266 | 
267 |     if (method == method.name[6]){
268 |       return(as.vector(w / sum(w)))
269 |     }
270 | 
271 |     if (method == method.name[9]){
272 |       if ( ! is.number(method.par$epsilon)){
273 |         stop("Please specify correct parameters for Greedy-Bayes-Poisson-TS!")
274 |       }
275 |       n <- length(w)
276 |       maxIdx <- which(w == max(w))
277 |       maxCount <- length(maxIdx)
278 |       maxVector <- rep(0, n)
279 |       maxVector[maxIdx] <- 1 / maxCount
280 |       eps <- method.par$epsilon
281 |       return(as.vector(eps * w / sum(w) + (1 - eps) * maxVector))
282 |     }
283 | 
284 |     if (method == method.name[10]){
285 |       if ( ! is.number(method.par$EXP3$gamma) |
286 |            ! is.number(method.par$EXP3$eta) ){
287 |         stop("Please specify correct parameters for EXP3-Bayes-Poisson-TS!")
288 |       }
289 |       if (reward.family != "Bernoulli"){
290 |         stop("Please use Bernoulli Reward Family to run Bayes-Poisson-TS!")
291 |       }
292 |       eta <- method.par$EXP3$eta
293 |       gamma <- method.par$EXP3$gamma
294 |       prevWeight <- EXP3Info$prevWeight
295 |       temp <- prevWeight * exp(eta * w / sum(w))
296 |       return(as.vector((1 - gamma) * temp / sum(temp) + gamma / n))
297 |     }
298 |   }
299 | }
300 | 


--------------------------------------------------------------------------------
/R/CalculateWeight.R~:
--------------------------------------------------------------------------------
  1 | # Copyright 2012-2017 Google
  2 | # Licensed under the Apache License, Version 2.0 (the "License");
  3 | # you may not use this file except in compliance with the License.
  4 | # You may obtain a copy of the License at
  5 | #
  6 | #     http://www.apache.org/licenses/LICENSE-2.0
  7 | #
  8 | # Unless required by applicable law or agreed to in writing, software
  9 | # distributed under the License is distributed on an "AS IS" BASIS,
 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 11 | # See the License for the specific language governing permissions and
 12 | # limitations under the License.
 13 | 
 14 | #' Calculate the probability of pulling each arm in the next period for various
 15 | #' strategies
 16 | #'
 17 | #' This function is aimed to compute the probability of pulling each arm for
 18 | #' various methods in Multi-Armed Bandit given the total reward and the number
 19 | #' of trials for each arm.
 20 | #'
 21 | #' @param method A character string choosing from "Epsilon-Greedy",
 22 | #' "Epsilon-Decreasing", "Thompson-Sampling",
 23 | #' "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling",
 24 | #' "EXP3-Thompson-Sampling",
 25 | #' "Greedy-Bayes-Poisson-TS" and "EXP3-Bayes-Poisson-TS".
 26 | #' See \code{\link{SimulateMultiplePeriods}} for more details.
 27 | #' Default is "Thompson-Sampling".
 28 | #' @param method.par A list of parameters needed for different methods:
 29 | #'
 30 | #' \code{epsilon}: A real number between 0 and 1; needed for "Epsilon-Greedy",
 31 | #' "Epsilon-Decreasing", "Greedy-Thompson-Sampling" and "Greedy-Bayes-Poisson-TS".
 32 | #'
 33 | #' \code{ndraws.TS}: A positive integer specifying the number of random draws
 34 | #' from the posterior;
 35 | #' needed for "Thompson-Sampling", "Greedy-Thompson-Sampling" and
 36 | #' "EXP3-Thompson-Sampling".  Default is 1000.
 37 | #'
 38 | #' \code{EXP3}: A list consisting of two real numbers \code{eta} and \code{gamma};
 39 | #' \eqn{eta > 0} and \eqn{0 <= gamma < 1}; needed for "EXP3",
 40 | #' "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS".
 41 | #'
 42 | #' \code{BP}: A list consisting of three postive integers \code{iter.BP},
 43 | #' \code{ndraws.BP} and \code{interval.BP};
 44 | #' needed for "Bayes-Poisson-TS", "Greedy-Bayes-Poisson-TS" and
 45 | #' "EXP3-Bayes-Poisson-TS"; \code{iter.BP} specifies the number of iterations
 46 | #' to compute posterior;
 47 | #' \code{ndraws.BP} specifies the number of posterior samples drawn from
 48 | #' posterior distribution; \code{interval.BP} is specified to draw each
 49 | #' posterior sample from
 50 | #'  a sample sequence of length \code{interval.BP}.
 51 | #' @param all.event A data frame containing two columns \code{trial} and
 52 | #' \code{reward} with the number of rows equal to the number of arms.
 53 | #' Each element of \code{trial} and \code{reward} represents the number of trials
 54 | #' and the total reward for each arm respectively.
 55 | #' @param reward.family A character string specifying the distribution family
 56 | #' of reward. Available distribution includes
 57 | #'  "Bernoulli", "Poisson" and "Gaussian". If "Gaussian" is chosen to be the
 58 | #' reward distribution,
 59 | #' a vector of standard deviation should be provided in \code{sd.reward}.
 60 | #' @param sd.reward A vector of non-negative numbers specifying standard
 61 | #' deviation of each arm's reward distribution if "Gaussian" is chosen to be
 62 | #' the reward distribution. Default to be NULL.
 63 | #' See \code{reward.family}.
 64 | #' @param period A positive integer specifying the period index. Default to be 1.
 65 | #' @param EXP3Info A list of three vectors \code{prevWeight}, \code{EXP3Trial}
 66 | #' and \code{EXP3Reward} with dimension equal to the number of arms,
 67 | #' needed for "EXP3", "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS":
 68 | #'
 69 | #' \code{prevWeight}: the weight vector in the previous EXP3 iteration.
 70 | #'
 71 | #' \code{EXP3Trial} and \code{EXP3Reward}: vectors representing
 72 | #' the number of trials and the total reward for each arm in
 73 | #' the previous period respectively.
 74 | #'
 75 | #' See \code{\link{SimulateMultiplePeriods}} for more details.
 76 | #' @return A normalized weight vector for future randomized allocation.
 77 | #' @export
 78 | #' @examples
 79 | #' ### Calculate weights using Thompson Sampling if reward follows Poisson distribution
 80 | #' set.seed(100)
 81 | #' CalculateWeight(method = "Thompson-Sampling",
 82 | #'                 method.par = list(ndraws.TS = 1000),
 83 | #'                 all.event = data.frame(reward = 1:3, trial = rep(10, 3)),
 84 | #'                 reward.family = "Poisson")
 85 | #' ### Calculate weights using EXP3
 86 | #' CalculateWeight(method = "EXP3",
 87 | #'                 method.par = list(EXP3 = list(gamma = 0.01, eta =0.1)),
 88 | #'                 all.event = data.frame(reward = 1:3, trial = rep(10, 3)),
 89 | #'                 reward.family = "Bernoulli",
 90 | #'                 EXP3Info = list(prevWeight = rep(1, 3), EXP3Trial = rep(5, 3), EXP3Reward = 0:2))
 91 | 
 92 | 
 93 | 
 94 | CalculateWeight <- function(method = "Thompson-Sampling",
 95 |                             method.par = list(ndraws.TS = 1000),
 96 |                             all.event,
 97 |                             reward.family,
 98 |                             sd.reward = NULL,
 99 |                             period = 1,
100 |                             EXP3Info = NULL){
101 |   method.name <- c("Epsilon-Greedy", "Epsilon-Decreasing", "Thompson-Sampling",
102 |                    "EXP3", "UCB", "Bayes-Poisson-TS",
103 |                    "Greedy-Thompson-Sampling", "EXP3-Thompson-Sampling",
104 |                    "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS")
105 |   if (! method %in% method.name){
106 |     stop("Please specify correct method names!")
107 |   }
108 |   if (! reward.family %in% c("Bernoulli", "Poisson", "Gaussian")){
109 |     stop("Please specify correct reward family!")
110 |   }
111 |   
112 |   
113 |   if (method == method.name[1]){
114 |     if (! is.number(method.par$epsilon) ){
115 |       stop("Please specify correct parameters for Epsilon-Greedy!")
116 |     }
117 |     eps <- method.par$epsilon
118 |     rate <- ifelse(all.event$trial == 0, 0, all.event$reward / all.event$trial)
119 |     n <- length(rate)
120 |     maxIdx <- which(rate == max(rate))
121 |     maxCount <- length(maxIdx)
122 |     weight <- rep(eps / (n - maxCount), n)
123 |     weight[maxIdx] <- (1 - eps) / maxCount
124 |     return(weight)
125 |   }
126 |   
127 |   
128 |   if (method == method.name[2]){
129 |     if ( ! is.number(method.par$epsilon)){
130 |       stop("Please specify correct parameters for Epsilon-Decreasing!")
131 |     }
132 |     eps <- method.par$epsilon / period
133 |     rate <- ifelse(all.event$trial == 0, 0, all.event$reward / all.event$trial)
134 |     n <- length(rate)
135 |     maxIdx <- which(rate == max(rate))
136 |     maxCount <- length(maxIdx)
137 |     weight <- rep(eps / (n - maxCount), n)
138 |     weight[maxIdx] <- (1 - eps) / maxCount
139 |     return(weight)
140 |   }
141 |   
142 |   
143 |   if (method == method.name[3] |
144 |       method == method.name[7] |
145 |       method == method.name[8]){
146 |     if (! is.number(method.par$ndraws.TS)){
147 |       stop("Please specify correct parameters for Thompson-Sampling!")
148 |     }
149 |     ndraws.TS <- method.par$ndraws.TS
150 |     reward <- all.event$reward
151 |     trial <- all.event$trial
152 |     n <- length(reward)
153 |     ans <- matrix(nrow = ndraws.TS, ncol = n)
154 |     if (reward.family == "Bernoulli"){
155 |       failure <- trial - reward
156 |       for (i in 1:n) ans[ ,i] <- rbeta(ndraws.TS, shape1 = reward[i] + 1,
157 |                                        shape2 = failure[i] + 1)
158 |     }
159 |     if (reward.family == "Gaussian"){
160 |       for (i in 1:n) ans[ ,i] <- rnorm(ndraws.TS, mean = reward[i] / trial[i],
161 |                                        sd = sd.reward[i] / sqrt(trial[i]))
162 |     }
163 |     if (reward.family == "Poisson"){
164 |       for (i in 1:n) ans[ ,i] <- rgamma(ndraws.TS, shape = reward[i] + 1,
165 |                                         scale = 1 / trial[i])
166 |     }
167 |     
168 |     w <- table(factor(max.col(ans), levels = 1:n))
169 |     
170 |     
171 |     if (method == method.name[3]){
172 |       return(as.vector(w / sum(w)))
173 |     }
174 |     
175 |     if (method == method.name[7]){
176 |       if (! is.number(method.par$epsilon)){
177 |         stop("Please specify correct parameters for Greedy-Thompson-Sampling!")
178 |       }
179 |       n <- length(w)
180 |       maxIdx <- which(w == max(w))
181 |       maxCount <- length(maxIdx)
182 |       maxVector <- rep(0, n)
183 |       maxVector[maxIdx] <- 1 / maxCount
184 |       eps <- method.par$epsilon
185 |       return(as.vector(eps * w / sum(w) + (1 - eps) * maxVector))
186 |     }
187 |     
188 |     if (method == method.name[8]){
189 |       if ( ! is.number(method.par$EXP3$gamma) |
190 |            ! is.number(method.par$EXP3$eta) ){
191 |         stop("Please specify correct parameters for EXP3-Thompson-Sampling!")
192 |       }
193 |       if (reward.family != "Bernoulli"){
194 |         stop("Please use Bernoulli Reward Family to run EXP3")
195 |       }
196 |       
197 |       eta <- method.par$EXP3$eta
198 |       gamma <- method.par$EXP3$gamma
199 |       prevWeight <- EXP3Info$prevWeight
200 |       temp <- prevWeight * exp(eta * w / sum(w))
201 |       
202 |       return(as.vector((1 - gamma) * temp / sum(temp) + gamma / n))
203 |     }
204 |   }
205 |   
206 |   
207 |   if (method == method.name[4]){
208 |     if (! is.number(method.par$EXP3$gamma) | ! is.number(method.par$EXP3$eta)){
209 |       stop("Please specify correct parameters for EXP3!")
210 |     }
211 |     if (reward.family != "Bernoulli"){
212 |       stop("Please use Bernoulli Reward Family to run EXP3")
213 |     }
214 |     prevWeight <- EXP3Info$prevWeight
215 |     EXP3Trial <- EXP3Info$EXP3Trial
216 |     EXP3Reward <- EXP3Info$EXP3Reward
217 |     eta <- method.par$EXP3$eta
218 |     gamma <- method.par$EXP3$gamma
219 |     
220 |     EXP3Rate <- ifelse(EXP3Trial == 0, 0, EXP3Reward / EXP3Trial)
221 |     temp <- prevWeight *
222 |       exp(eta * (EXP3Rate - max(EXP3Rate)) * sum(EXP3Trial) / length(EXP3Trial))
223 |     
224 |     return(as.vector((1 - gamma) * temp / sum(temp) + gamma / length(EXP3Trial)))
225 |   }
226 |   
227 |   
228 |   if (method == method.name[5]){
229 |     reward <- all.event$reward
230 |     trial <- all.event$trial
231 |     rate <- reward / trial
232 |     UCB <- rate + sqrt(2 * log(period) / trial)
233 |     maxIdx <- which(UCB == max(UCB))
234 |     maxCount <- length(maxIdx)
235 |     maxVector <- rep(0, length(reward))
236 |     maxVector[maxIdx] <- 1 / maxCount
237 |     return(maxVector)
238 |   }
239 |   
240 |   if (method == method.name[6] |
241 |       method == method.name[9] |
242 |       method == method.name[10]){
243 |     if ( ! is.number(method.par$BP$iter.BP) |
244 |          ! is.number(method.par$BP$ndraws.BP) |
245 |          ! is.number(method.par$BP$interval.BP)){
246 |       stop("Please specify correct parameters for Bayes-Poisson-TS!")
247 |     }
248 |     if (reward.family == "Gaussian"){
249 |       stop("Please not use Gaussian Reward Family to run Bayes-Poisson-TS!")
250 |     }
251 |     iter.BP <- method.par$BP$iter.BP
252 |     ndraws.BP <- method.par$BP$ndraws.BP
253 |     interval.BP <- method.par$BP$interval.BP
254 |     
255 |     n <- length(all.event$trial)
256 |     temp <- all.event
257 |     temp$Id <- sapply(1:n, function(x) paste("Arm", x))
258 |     
259 |     mdl <- SetupEMREoptim(
260 |       "reward ~ 1 + (1|Id) + offset(trial)",
261 |       data = temp, model.constructor = PoissonEMRE,
262 |       burnin = iter.BP - ndraws.BP * interval.BP,
263 |       thinning.interval = 1, llik.interval = 1)
264 |     mdlResult <- FitEMRE(mdl, max.iter = iter.BP, debug = FALSE)
265 |     posterior <- mdlResult$snapshots
266 |     useIdx <- seq(1, ndraws.BP * interval.BP, interval.BP)
267 |     armPos <- posterior[["1__Id"]][useIdx, ]
268 |     biasPos <- posterior[["__bias__"]][useIdx, ]
269 |     winnerPred <- sapply(1:length(biasPos), function(k) {
270 |       biasK <- biasPos[k]
271 |       armK <- armPos[k, ]
272 |       predK <- biasK  * armK
273 |       return(which(predK == max(predK))[1])
274 |     })
275 |     w <- table(factor(winnerPred, levels = 1:n))
276 |     
277 |     if (method == method.name[6]){
278 |       return(as.vector(w / sum(w)))
279 |     }
280 |     
281 |     if (method == method.name[9]){
282 |       if ( ! is.number(method.par$epsilon)){
283 |         stop("Please specify correct parameters for Greedy-Bayes-Poisson-TS!")
284 |       }
285 |       n <- length(w)
286 |       maxIdx <- which(w == max(w))
287 |       maxCount <- length(maxIdx)
288 |       maxVector <- rep(0, n)
289 |       maxVector[maxIdx] <- 1 / maxCount
290 |       eps <- method.par$epsilon
291 |       return(as.vector(eps * w / sum(w) + (1 - eps) * maxVector))
292 |     }
293 |     
294 |     if (method == method.name[10]){
295 |       if ( ! is.number(method.par$EXP3$gamma) |
296 |            ! is.number(method.par$EXP3$eta) ){
297 |         stop("Please specify correct parameters for EXP3-Bayes-Poisson-TS!")
298 |       }
299 |       if (reward.family != "Bernoulli"){
300 |         stop("Please use Bernoulli Reward Family to run Bayes-Poisson-TS!")
301 |       }
302 |       
303 |       eta <- method.par$EXP3$eta
304 |       gamma <- method.par$EXP3$gamma
305 |       prevWeight <- EXP3Info$prevWeight
306 |       temp <- prevWeight * exp(eta * w / sum(w))
307 |       
308 |       return(as.vector((1 - gamma) * temp / sum(temp) + gamma / n))
309 |     }
310 |   }
311 | }
312 | 


--------------------------------------------------------------------------------
/R/SimulateMultipleMethods.R:
--------------------------------------------------------------------------------
  1 | # Copyright 2012-2017 Google
  2 | # Licensed under the Apache License, Version 2.0 (the "License");
  3 | # you may not use this file except in compliance with the License.
  4 | # You may obtain a copy of the License at
  5 | #
  6 | #     http://www.apache.org/licenses/LICENSE-2.0
  7 | #
  8 | # Unless required by applicable law or agreed to in writing, software
  9 | # distributed under the License is distributed on an "AS IS" BASIS,
 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 11 | # See the License for the specific language governing permissions and
 12 | # limitations under the License.
 13 | 
 14 | #' Compare various strategies for Multi-Armed Bandit in stationary
 15 | #' and non-stationary scenarios
 16 | #'
 17 | #' This function is aimed to simulate data in different scenarios to
 18 | #' compare various strategies in Multi-Armed Bandit.
 19 | #' Users can specify the distribution of the number of arms,
 20 | #' the distribution of mean reward, the distribution of the number of pulls
 21 | #' in one period and the stationariness to simulate different scenarios.
 22 | #'  Relative regret is returned and average relative regret plot is returned
 23 | #' if needed.
 24 | #' See \code{\link{SimulateMultiplePeriods}} for more details.
 25 | #' @param method A vector of character strings choosing from "Epsilon-Greedy",
 26 | #' "Epsilon-Decreasing", "Thompson-Sampling",
 27 | #' "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling",
 28 | #' "EXP3-Thompson-Sampling",
 29 | #' "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS"  and "HyperTS".
 30 | #' See \code{\link{SimulateMultiplePeriods}} for more details.
 31 | #' Default is "Thompson-Sampling".
 32 | #' @param method.par A list of parameters needed for different methods:
 33 | #'
 34 | #' \code{epsilon}: A real number between 0 and 1; needed for "Epsilon-Greedy",
 35 | #' "Epsilon-Decreasing", "Greedy-Thompson-Sampling" and
 36 | #' "Greedy-Bayes-Poisson-TS".
 37 | #'
 38 | #' \code{ndraws.TS}: A positive integer specifying
 39 | #' the number of random draws from the posterior;
 40 | #' needed for "Thompson-Sampling", "Greedy-Thompson-Sampling"
 41 | #' and "EXP3-Thompson-Sampling". Default is 1000.
 42 | #'
 43 | #' \code{EXP3}: A list consisting of two real numbers \code{eta} and
 44 | #' \code{gamma};
 45 | #' \eqn{eta > 0} and \eqn{0 <= gamma < 1}; needed for "EXP3",
 46 | #' "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS".
 47 | #'
 48 | #' \code{BP}: A list consisting of three postive integers \code{iter.BP},
 49 | #' \code{ndraws.BP} and \code{interval.BP};
 50 | #' needed for "Bayes-Poisson-TS", "Greedy-Bayes-Poisson-TS"
 51 | #' and "EXP3-Bayes-Poisson-TS"; \code{iter.BP} specifies the number
 52 | #' of iterations to compute posterior;
 53 | #' \code{ndraws.BP} specifies the number of posterior samples drawn
 54 | #' from posterior distribution; \code{interval.BP} is specified to
 55 | #' draw each posterior sample from
 56 | #'  a sample sequence of length \code{interval.BP}.
 57 | #'
 58 | #' \code{HyperTS}: A list consisting of a vector \code{method.list},
 59 | #' needed for "HyperTS". \code{method.list} is a vector of character strings
 60 | #' choosing from "Epsilon-Greedy", "Epsilon-Decreasing", "Thompson-Sampling",
 61 | #' "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling",
 62 | #' "EXP3-Thompson-Sampling",
 63 | #' "Greedy-Bayes-Poisson-TS" and "EXP3-Bayes-Poisson-TS".
 64 | #' "HyperTS" will construct an ensemble consisting all the methods
 65 | #' in \code{method.list}.
 66 | #' @param iter A positive integer specifying the number of iterations.
 67 | #' @param nburnin A positive integer specifying the number of periods
 68 | #' to allocate each arm equal traffic before applying any strategy.
 69 | #' @param nperiod A positive integer specifying the number of periods
 70 | #' to apply various strategies.
 71 | #'
 72 | #' @param reward.mean.family A character string specifying
 73 | #' the distribution family to generate mean reward of each arm.
 74 | #' Available distribution includes "Uniform", "Beta" and "Gaussian".
 75 | #' @param reward.family A character string specifying the distribution family
 76 | #' of reward. Available distribution includes
 77 | #'  "Bernoulli", "Poisson" and "Gaussian".
 78 | #' If "Gaussian" is chosen to be the reward distribution,
 79 | #'  a vector of standard deviation should be provided in
 80 | #' \code{sd.reward} in \code{data.par}.
 81 | #' @param narms.family A character string specifying the distribution family
 82 | #' of the number of arms. Available distribution includes "Poisson" and
 83 | #' "Binomial".
 84 | #' @param npulls.family A character string specifying the distribution family
 85 | #' of the number of pulls per period.
 86 | #' For continuous distribution, the number of pulls will be rounded up.
 87 | #' Available distribution includes "Log-Normal" and "Poisson".
 88 | #' @param stationary A logic value indicating whether a stationary
 89 | #' Multi-Armed Bandit is considered (corresponding to the case that
 90 | #' the reward mean is unchanged). Default to be TRUE.
 91 | #' @param nonstationary.type A character string indicating
 92 | #' how the mean reward varies. Available types include "Random Walk" and
 93 | #' "Geometric Random Walk"
 94 | #' (reward mean follows random walk in the log scale). Default to be NULL.
 95 | #' @param data.par A list of data generating parameters:
 96 | #'
 97 | #' \code{reward.mean}: A list of parameters of \code{reward.mean.family}:
 98 | #' \code{min} and \code{max} are two real numbers specifying
 99 | #' the bounds when \eqn{reward.mean.family = "Uniform"}; \code{shape1} and
100 | #' \code{shape2} are two shape parameters when
101 | #' \eqn{reward.mean.family = "Beta"};
102 | #' \code{mean} and \code{sd} specify mean and standard deviation
103 | #' when \eqn{reward.mean.family = "Gaussian"}.
104 | #'
105 | #' \code{reward.family}: A list of parameters of \code{reward.family}:
106 | #' \code{sd} is a vector of non-negative numbers specifying standard deviation
107 | #' of each arm's reward distribution
108 | #' if "Gaussian" is chosen to be the reward distribution.
109 | #'
110 | #' \code{narms.family}: A list of parameters of \code{narms.family}:
111 | #' \code{lambda} is a positive parameter specifying the mean when
112 | #' \eqn{narms.family = "Poisson"}; \code{size} and \code{prob}
113 | #' are 2 parameters needed when \eqn{narms.family = "Binomial"}.
114 | #'
115 | #' \code{npulls.family}: A list of parameters of \code{npulls.family}:
116 | #' \code{meanlog} and \code{sdlog} are 2 positive parameters specifying the mean
117 | #' and standard deviation in the log scale
118 | #'  when \eqn{npulls.family = "Log-Normal"};
119 | #' \code{lambda} is a positive parameter
120 | #' specifying the mean when \eqn{npulls.family = "Poisson"}.
121 | #'
122 | #' \code{nonstationary.family}:
123 | #' A list of parameters of \code{nonstationary.type}:
124 | #' \code{sd} is a positive parameter specifying the standard deviation
125 | #' of white noise
126 | #'  when \eqn{nonstationary.type = "Random Walk"}; \code{sdlog} is
127 | #' a positive parameter specifying the log standard deviation of white noise
128 | #'  when \eqn{nonstationary.type = "Geometric Random Walk"}.
129 | #' @param regret.plot A logic value indicating whether an average regret plot
130 | #' is returned. Default to be FALSE.
131 | #' @return a list consisting of:
132 | #' \item{regret.matrix}{A three-dimensional array with each dimension corresponding to the period, iteration and method.}
133 | #' \item{regret.plot.object}{If regret.plot = TRUE, a ggplot object is returned.}
134 | #' @export
135 | #' @examples
136 | #' ### Compare Epsilon-Greedy and Thompson Sampling in the stationary case.
137 | #' set.seed(100)
138 | #' res <- SimulateMultipleMethods(
139 | #'            method = c("Epsilon-Greedy", "Thompson-Sampling"),
140 | #'            method.par = list(epsilon = 0.1, ndraws.TS = 1000),
141 | #'            iter = 100,
142 | #'            nburnin = 30,
143 | #'            nperiod = 180,
144 | #'            reward.mean.family = "Uniform",
145 | #'            reward.family = "Bernoulli",
146 | #'            narms.family = "Poisson",
147 | #'            npulls.family = "Log-Normal",
148 | #'            data.par = list(reward.mean = list(min = 0, max = 0.1),
149 | #'                            npulls.family = list(meanlog = 3, sdlog = 1.5),
150 | #'                            narms.family = list(lambda = 5)),
151 | #'            regret.plot = TRUE)
152 | #' res$regret.plot.object
153 | #' ### Compare Epsilon-Greedy, Thompson Sampling and EXP3 in the non-stationary case.
154 | #' set.seed(100)
155 | #' res <- SimulateMultipleMethods(
156 | #'            method = c("Epsilon-Greedy", "Thompson-Sampling", "EXP3"),
157 | #'            method.par = list(epsilon = 0.1,
158 | #'                              ndraws.TS = 1000,
159 | #'                              EXP3 = list(gamma = 0, eta = 0.1)),
160 | #'            iter = 100,
161 | #'            nburnin = 30,
162 | #'            nperiod = 90,
163 | #'            reward.mean.family = "Beta",
164 | #'            reward.family = "Bernoulli",
165 | #'            narms.family = "Binomial",
166 | #'            npulls.family = "Log-Normal",
167 | #'            stationary = FALSE,
168 | #'            nonstationary.type = "Geometric Random Walk",
169 | #'            data.par = list(reward.mean = list(shape1 = 2, shape2 = 5),
170 | #'                            npulls.family = list(meanlog = 3, sdlog = 1),
171 | #'                            narms.family = list(size = 10, prob = 0.5),
172 | #'                            nonstationary.family = list(sdlog = 0.05)),
173 | #'            regret.plot = TRUE)
174 | #' res$regret.plot.object
175 | 
176 | SimulateMultipleMethods <- function(method = "Thompson-Sampling",
177 |                                     method.par = list(ndraws.TS = 1000),
178 |                                     iter,
179 |                                     nburnin,
180 |                                     nperiod,
181 |                                     reward.mean.family,
182 |                                     reward.family,
183 |                                     narms.family,
184 |                                     npulls.family,
185 |                                     stationary = TRUE,
186 |                                     nonstationary.type = NULL,
187 |                                     data.par,
188 |                                     regret.plot = FALSE){
189 |   if (! all(method %in% c("Epsilon-Greedy", "Epsilon-Decreasing",
190 |                           "Thompson-Sampling","EXP3", "UCB", "Bayes-Poisson-TS",
191 |                           "Greedy-Thompson-Sampling", "EXP3-Thompson-Sampling",
192 |                           "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS",
193 |                           "HyperTS"))){
194 |     stop("Please specify correct method names!")
195 |   }
196 |   if (! reward.family %in% c("Bernoulli", "Poisson", "Gaussian")){
197 |     stop("Please specify correct reward family!")
198 |   }
199 |   if (! reward.mean.family %in% c("Uniform", "Beta", "Gaussian")){
200 |     stop("Please specify correct mean reward family!")
201 |   }
202 |   if (! narms.family %in% c("Binomial", "Poisson")){
203 |     stop("Please specify correct distribution family for the number of arms!")
204 |   }
205 |   if (! npulls.family %in% c("Log-Normal", "Poisson")){
206 |     stop("Please specify correct distribution family for the number of pulls!")
207 |   }
208 | 
209 |   nmethod <- length(method)
210 |   regret.matrix <- array(0, c(nperiod, iter, nmethod))
211 |   for (i in 1:iter){
212 |     if (narms.family == "Poisson"){
213 |       lambda <- data.par$narms.family$lambda
214 |       while(TRUE){
215 |         narms <- rpois(1, lambda)
216 |         if (narms > 1){
217 |           break
218 |         }
219 |       }
220 |     }
221 |     if (narms.family == "Binomial"){
222 |       size <- data.par$narms.family$size
223 |       prob <- data.par$narms.family$prob
224 |       while(TRUE){
225 |         narms <- rbinom(1, size, prob)
226 |         if (narms > 1){
227 |           break
228 |         }
229 |       }
230 |     }
231 | 
232 |     if (reward.mean.family == "Uniform"){
233 |       mean.reward <- runif(narms, min = data.par$reward.mean$min,
234 |                            max = data.par$reward.mean$max)
235 |     }
236 |     if (reward.mean.family == "Beta"){
237 |       mean.reward <- rbeta(narms, shape1 = data.par$reward.mean$shape1,
238 |                            shape2 = data.par$reward.mean$shape2)
239 |     }
240 |     if (reward.mean.family == "Gaussian"){
241 |       if (reward.family == "Bernoulli" | reward.family == "Poisson"){
242 |         stop("Please not use Gaussian if reward family is Bernoulli or Poisson!")
243 |       }
244 |       mean.reward <- rnorm(narms, mean = data.par$reward.mean$mean,
245 |                            sd = data.par$reward.mean$sd)
246 |     }
247 | 
248 |     if (stationary == FALSE){
249 |       mean.reward.matrix <- matrix(0, nrow = nburnin + nperiod, ncol = narms)
250 |       mean.reward.matrix[1, ] <- mean.reward
251 |       if (nonstationary.type == "Geometric Random Walk"){
252 |         for (j in 2:(nburnin + nperiod)){
253 |           if (reward.family == "Bernoulli"){
254 |             mean.reward.matrix[j, ] <-
255 |               sapply(mean.reward.matrix[j - 1, ] *
256 |                        rlnorm(narms, meanlog = 0,
257 |                               sdlog = data.par$nonstationary.family$sdlog),
258 |                      function(x) min(1, x))
259 |           }else{
260 |             mean.reward.matrix[j, ] <-
261 |               mean.reward.matrix[j - 1, ] *
262 |               rlnorm(narms, meanlog = 0,
263 |                      sdlog = data.par$nonstationary.family$sdlog)
264 |           }
265 |         }
266 |         mean.reward <- mean.reward.matrix
267 |       }
268 |       if (nonstationary.type == "Random Walk"){
269 |         if (reward.family == "Bernoulli" | reward.family == "Poisson"){
270 |           stop("Please not use Random Walk to generate mean reward if reward family is Bernoulli or Poisson!")
271 |         }
272 |         for (j in 2:(nburnin + nperiod)){
273 |           mean.reward.matrix[j, ] <- mean.reward.matrix[j - 1, ] +
274 |             rnorm(narms, mean = 0, sd = data.par$nonstationary.family$sd)
275 |         }
276 |         mean.reward <- mean.reward.matrix
277 |       }
278 |     }
279 | 
280 |     if (npulls.family == "Log-Normal"){
281 |       npulls.per.period <- ceiling(
282 |           rlnorm(nburnin + nperiod,
283 |                  meanlog = data.par$npulls.family$meanlog,
284 |                  sdlog = data.par$npulls.family$sdlog))
285 |     }
286 |     if (npulls.family == "Poisson"){
287 |       npulls.per.period <- rep(0, nburnin + nperiod)
288 |       for (num in 1:(nburnin + nperiod)){
289 |         while(TRUE){
290 |           npulls.per.period[num] <-
291 |               rpois(1, lambda = data.par$npulls.family$lambda)
292 |           if (npulls.per.period[num] > 0){
293 |             break
294 |           }
295 |         }
296 |       }
297 |     }
298 | 
299 |     for (k in 1:length(method)){
300 |       regret <- SimulateMultiplePeriods(method = method[k],
301 |                                         nburnin = nburnin,
302 |                                         nperiod = nperiod,
303 |                                         mean.reward = mean.reward,
304 |                                         reward.family = reward.family,
305 |                                         sd.reward = data.par$reward.family$sd,
306 |                                         npulls.per.period = npulls.per.period,
307 |                                         method.par = method.par)$regret
308 |       regret.matrix[, i, k] <- regret
309 |     }
310 |   }
311 | 
312 |   if (regret.plot == TRUE){
313 |     relativeRegret <- c(apply(regret.matrix, c(1,3), mean))
314 |     methodName <- rep(method, each = nperiod)
315 |     daySeq <- rep(1:nperiod, nmethod)
316 |     graphData <- data.frame(daySeq, relativeRegret, methodName)
317 |     regret.plot.object <- ggplot(graphData,
318 |                                  aes(x = daySeq,
319 |                                      y = relativeRegret,
320 |                                      colour = factor(methodName))) +
321 |       geom_line() + ggtitle("Regret Plot") +
322 |       theme(legend.text = element_text(size = 12, face = "bold"),
323 |             axis.text.y = element_text(size = 12),
324 |             legend.title = element_blank()) +
325 |       labs(y = "Relative Regret", x = "day")
326 |     return(list(regret.matrix = regret.matrix,
327 |                 regret.plot.object = regret.plot.object))
328 |   } else {
329 |     return(list(regret.matrix = regret.matrix))
330 |   }
331 | }
332 | 


--------------------------------------------------------------------------------
/R/SimulateMultipleMethods.R~:
--------------------------------------------------------------------------------
  1 | # Copyright 2012-2017 Google
  2 | # Licensed under the Apache License, Version 2.0 (the "License");
  3 | # you may not use this file except in compliance with the License.
  4 | # You may obtain a copy of the License at
  5 | #
  6 | #     http://www.apache.org/licenses/LICENSE-2.0
  7 | #
  8 | # Unless required by applicable law or agreed to in writing, software
  9 | # distributed under the License is distributed on an "AS IS" BASIS,
 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 11 | # See the License for the specific language governing permissions and
 12 | # limitations under the License.
 13 | 
 14 | #' Compare various strategies for Multi-Armed Bandit in stationary
 15 | #' and non-stationary scenarios
 16 | #'
 17 | #' This function is aimed to simulate data in different scenarios to
 18 | #' compare various strategies in Multi-Armed Bandit.
 19 | #' Users can specify the distribution of the number of arms,
 20 | #' the distribution of mean reward, the distribution of the number of pulls
 21 | #' in one period and the stationariness to simulate different scenarios.
 22 | #'  Relative regret is returned and average relative regret plot is returned if needed.
 23 | #' See \code{\link{SimulateMultiplePeriods}} for more details.
 24 | #' @param method A vector of character strings choosing from "Epsilon-Greedy",
 25 | #' "Epsilon-Decreasing", "Thompson-Sampling",
 26 | #' "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling",
 27 | #' "EXP3-Thompson-Sampling",
 28 | #' "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS"  and "HyperTS".
 29 | #' See \code{\link{SimulateMultiplePeriods}} for more details.
 30 | #' Default is "Thompson-Sampling".
 31 | #' @param method.par A list of parameters needed for different methods:
 32 | #'
 33 | #' \code{epsilon}: A real number between 0 and 1; needed for "Epsilon-Greedy",
 34 | #' "Epsilon-Decreasing", "Greedy-Thompson-Sampling" and "Greedy-Bayes-Poisson-TS".
 35 | #' 
 36 | #' \code{ndraws.TS}: A positive integer specifying
 37 | #' the number of random draws from the posterior;
 38 | #' needed for "Thompson-Sampling", "Greedy-Thompson-Sampling"
 39 | #' and "EXP3-Thompson-Sampling". Default is 1000.
 40 | #' 
 41 | #' \code{EXP3}: A list consisting of two real numbers \code{eta} and \code{gamma};
 42 | #' \eqn{eta > 0} and \eqn{0 <= gamma < 1}; needed for "EXP3",
 43 | #' "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS".
 44 | #' 
 45 | #' \code{BP}: A list consisting of three postive integers \code{iter.BP},
 46 | #' \code{ndraws.BP} and \code{interval.BP};
 47 | #' needed for "Bayes-Poisson-TS", "Greedy-Bayes-Poisson-TS"
 48 | #' and "EXP3-Bayes-Poisson-TS"; \code{iter.BP} specifies the number of iterations
 49 | #' to compute posterior;
 50 | #' \code{ndraws.BP} specifies the number of posterior samples drawn
 51 | #' from posterior distribution; \code{interval.BP} is specified to
 52 | #' draw each posterior sample from
 53 | #'  a sample sequence of length \code{interval.BP}.
 54 | #'
 55 | #' \code{HyperTS}: A list consisting of a vector \code{method.list},
 56 | #' needed for "HyperTS". \code{method.list} is a vector of character strings
 57 | #' choosing from "Epsilon-Greedy", "Epsilon-Decreasing", "Thompson-Sampling",
 58 | #' "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling",
 59 | #' "EXP3-Thompson-Sampling",
 60 | #' "Greedy-Bayes-Poisson-TS" and "EXP3-Bayes-Poisson-TS".
 61 | #' "HyperTS" will construct an ensemble consisting all the methods
 62 | #' in \code{method.list}.
 63 | #' @param iter A positive integer specifying the number of iterations.
 64 | #' @param nburnin A positive integer specifying the number of periods
 65 | #' to allocate each arm equal traffic before applying any strategy.
 66 | #' @param nperiod A positive integer specifying the number of periods
 67 | #' to apply various strategies.
 68 | #'
 69 | #' @param reward.mean.family A character string specifying
 70 | #' the distribution family to generate mean reward of each arm.
 71 | #' Available distribution includes "Uniform", "Beta" and "Gaussian".
 72 | #' @param reward.family A character string specifying the distribution family
 73 | #' of reward. Available distribution includes
 74 | #'  "Bernoulli", "Poisson" and "Gaussian".
 75 | #' If "Gaussian" is chosen to be the reward distribution,
 76 | #'  a vector of standard deviation should be provided in
 77 | #' \code{sd.reward} in \code{data.par}.
 78 | #' @param narms.family A character string specifying the distribution family
 79 | #' of the number of arms. Available distribution includes "Poisson" and "Binomial".
 80 | #' @param npulls.family A character string specifying the distribution family
 81 | #' of the number of pulls per period.
 82 | #' For continuous distribution, the number of pulls will be rounded up.
 83 | #' Available distribution includes "Log-Normal" and "Poisson".
 84 | #' @param stationary A logic value indicating whether a stationary
 85 | #' Multi-Armed Bandit is considered (corresponding to the case that
 86 | #' the reward mean is unchanged). Default to be TRUE.
 87 | #' @param nonstationary.type A character string indicating
 88 | #' how the mean reward varies. Available types include "Random Walk" and
 89 | #' "Geometric Random Walk"
 90 | #' (reward mean follows random walk in the log scale). Default to be NULL.
 91 | #' @param data.par A list of data generating parameters:
 92 | #'
 93 | #' \code{reward.mean}: A list of parameters of \code{reward.mean.family}:
 94 | #' \code{min} and \code{max} are two real numbers specifying
 95 | #' the bounds when \eqn{reward.mean.family = "Uniform"}; \code{shape1} and
 96 | #' \code{shape2} are two shape parameters when \eqn{reward.mean.family = "Beta"};
 97 | #' \code{mean} and \code{sd} specify mean and standard deviation
 98 | #' when \eqn{reward.mean.family = "Gaussian"}.
 99 | #'
100 | #' \code{reward.family}: A list of parameters of \code{reward.family}:
101 | #' \code{sd} is a vector of non-negative numbers specifying standard deviation
102 | #' of each arm's reward distribution
103 | #' if "Gaussian" is chosen to be the reward distribution.
104 | #'
105 | #' \code{narms.family}: A list of parameters of \code{narms.family}:
106 | #' \code{lambda} is a positive parameter specifying the mean when
107 | #' \eqn{narms.family = "Poisson"}; \code{size} and \code{prob}
108 | #' are 2 parameters needed when \eqn{narms.family = "Binomial"}.
109 | #'
110 | #' \code{npulls.family}: A list of parameters of \code{npulls.family}:
111 | #' \code{meanlog} and \code{sdlog} are 2 positive parameters specifying the mean
112 | #' and standard deviation in the log scale
113 | #'  when \eqn{npulls.family = "Log-Normal"}; \code{lambda} is a positive parameter
114 | #' specifying the mean when \eqn{npulls.family = "Poisson"}.
115 | #'
116 | #'
117 | #' \code{nonstationary.family}: A list of parameters of \code{nonstationary.type}:
118 | #' \code{sd} is a positive parameter specifying the standard deviation
119 | #' of white noise
120 | #'  when \eqn{nonstationary.type = "Random Walk"}; \code{sdlog} is
121 | #' a positive parameter specifying the log standard deviation of white noise
122 | #'  when \eqn{nonstationary.type = "Geometric Random Walk"}.
123 | #' @param regret.plot A logic value indicating whether an average regret plot
124 | #' is returned. Default to be FALSE.
125 | #' @return a list consisting of:
126 | #' \item{regret.matrix}{A three-dimensional array with each dimension corresponding to the period, iteration and method.}
127 | #' \item{regret.plot.object}{If regret.plot = TRUE, a ggplot object is returned.}
128 | #' @export
129 | #' @examples
130 | #' ### Compare Epsilon-Greedy and Thompson Sampling in the stationary case.
131 | #' set.seed(100)
132 | #' res <- SimulateMultipleMethods(method = c("Epsilon-Greedy", "Thompson-Sampling"),
133 | #'                                method.par = list(epsilon = 0.1, ndraws.TS = 1000),
134 | #'                                iter = 100,
135 | #'                                nburnin = 30,
136 | #'                                nperiod = 180,
137 | #'                                reward.mean.family = "Uniform",
138 | #'                                reward.family = "Bernoulli",
139 | #'                                narms.family = "Poisson",
140 | #'                                npulls.family = "Log-Normal",
141 | #'                                data.par = list(reward.mean = list(min = 0, max = 0.1),
142 | #'                                                npulls.family = list(meanlog = 3, sdlog = 1.5),
143 | #'                                                narms.family = list(lambda = 5)),
144 | #'                                regret.plot = TRUE)
145 | #' res$regret.plot.object
146 | #' ### Compare Epsilon-Greedy, Thompson Sampling and EXP3 in the non-stationary case.
147 | #' set.seed(100)
148 | #' res <- SimulateMultipleMethods(method = c("Epsilon-Greedy", "Thompson-Sampling", "EXP3"),
149 | #'                                method.par = list(epsilon = 0.1,
150 | #'                                                  ndraws.TS = 1000,
151 | #'                                                  EXP3 = list(gamma = 0, eta = 0.1)),
152 | #'                                iter = 100,
153 | #'                                nburnin = 30,
154 | #'                                nperiod = 90,
155 | #'                                reward.mean.family = "Beta",
156 | #'                                reward.family = "Bernoulli",
157 | #'                                narms.family = "Binomial", 
158 | #'                                npulls.family = "Log-Normal",
159 | #'                                stationary = FALSE,
160 | #'                                nonstationary.type = "Geometric Random Walk",
161 | #'                                data.par = list(reward.mean = list(shape1 = 2, shape2 = 5),
162 | #'                                                npulls.family = list(meanlog = 3, sdlog = 1),
163 | #'                                                narms.family = list(size = 10, prob = 0.5),
164 | #'                                                nonstationary.family = list(sdlog = 0.05)),
165 | #'                                regret.plot = TRUE)
166 | #' res$regret.plot.object
167 | 
168 | 
169 | 
170 | SimulateMultipleMethods <- function(method = "Thompson-Sampling",
171 |                                     method.par = list(ndraws.TS = 1000),
172 |                                     iter,
173 |                                     nburnin,
174 |                                     nperiod,
175 |                                     reward.mean.family,
176 |                                     reward.family,
177 |                                     narms.family,
178 |                                     npulls.family,
179 |                                     stationary = TRUE,
180 |                                     nonstationary.type = NULL,
181 |                                     data.par,
182 |                                     regret.plot = FALSE){
183 |   if (! all(method %in% c("Epsilon-Greedy", "Epsilon-Decreasing",
184 |                           "Thompson-Sampling","EXP3", "UCB", "Bayes-Poisson-TS",
185 |                           "Greedy-Thompson-Sampling", "EXP3-Thompson-Sampling",
186 |                           "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS",
187 |                           "HyperTS"))){
188 |     stop("Please specify correct method names!")
189 |   }
190 |   if (! reward.family %in% c("Bernoulli", "Poisson", "Gaussian")){
191 |     stop("Please specify correct reward family!")
192 |   }
193 |   if (! reward.mean.family %in% c("Uniform", "Beta", "Gaussian")){
194 |     stop("Please specify correct mean reward family!")
195 |   }
196 |   if (! narms.family %in% c("Binomial", "Poisson")){
197 |     stop("Please specify correct distribution family for the number of arms!")
198 |   }
199 |   if (! npulls.family %in% c("Log-Normal", "Poisson")){
200 |     stop("Please specify correct distribution family for the number of pulls!")
201 |   }
202 |   
203 |   
204 |   nmethod <- length(method)
205 |   regret.matrix <- array(0, c(nperiod, iter, nmethod))
206 |   for (i in 1:iter){
207 |     if (narms.family == "Poisson"){
208 |       lambda <- data.par$narms.family$lambda
209 |       while(TRUE){
210 |         narms <- rpois(1, lambda)
211 |         if (narms > 1){
212 |           break
213 |         }
214 |       }
215 |     }
216 |     if (narms.family == "Binomial"){
217 |       size <- data.par$narms.family$size
218 |       prob <- data.par$narms.family$prob
219 |       while(TRUE){
220 |         narms <- rbinom(1, size, prob)
221 |         if (narms > 1){
222 |           break
223 |         }
224 |       }
225 |     }
226 |     
227 |     
228 |     if (reward.mean.family == "Uniform"){
229 |       mean.reward <- runif(narms, min = data.par$reward.mean$min,
230 |                            max = data.par$reward.mean$max)
231 |     }
232 |     if (reward.mean.family == "Beta"){
233 |       mean.reward <- rbeta(narms, shape1 = data.par$reward.mean$shape1,
234 |                            shape2 = data.par$reward.mean$shape2)
235 |     }
236 |     if (reward.mean.family == "Gaussian"){
237 |       if (reward.family == "Bernoulli" | reward.family == "Poisson"){
238 |         stop("Please not use Gaussian if reward family is Bernoulli or Poisson!")
239 |       }
240 |       mean.reward <- rnorm(narms, mean = data.par$reward.mean$mean,
241 |                            sd = data.par$reward.mean$sd)
242 |     }
243 |     
244 |     
245 |     if (stationary == FALSE){
246 |       mean.reward.matrix <- matrix(0, nrow = nburnin + nperiod, ncol = narms)
247 |       mean.reward.matrix[1, ] <- mean.reward
248 |       if (nonstationary.type == "Geometric Random Walk"){
249 |         for (j in 2:(nburnin + nperiod)){
250 |           if (reward.family == "Bernoulli"){
251 |             mean.reward.matrix[j, ] <-
252 |               sapply(mean.reward.matrix[j - 1, ] *
253 |                        rlnorm(narms, meanlog = 0,
254 |                               sdlog = data.par$nonstationary.family$sdlog),
255 |                      function(x) min(1, x))
256 |           }else{
257 |             mean.reward.matrix[j, ] <-
258 |               mean.reward.matrix[j - 1, ] *
259 |               rlnorm(narms, meanlog = 0,
260 |                      sdlog = data.par$nonstationary.family$sdlog)
261 |           }
262 |         }
263 |         mean.reward <- mean.reward.matrix
264 |       }
265 |       if (nonstationary.type == "Random Walk"){
266 |         if (reward.family == "Bernoulli" | reward.family == "Poisson"){
267 |           stop("Please not use Random Walk to generate mean reward if reward family is Bernoulli or Poisson!")
268 |         }
269 |         for (j in 2:(nburnin + nperiod)){
270 |           mean.reward.matrix[j, ] <- mean.reward.matrix[j - 1, ] +
271 |             rnorm(narms, mean = 0, sd = data.par$nonstationary.family$sd)
272 |         }
273 |         mean.reward <- mean.reward.matrix
274 |       }
275 |     }
276 |     
277 |     
278 |     if (npulls.family == "Log-Normal"){
279 |       npulls.per.period <- ceiling(rlnorm(nburnin + nperiod,
280 |                                           meanlog = data.par$npulls.family$meanlog,
281 |                                           sdlog = data.par$npulls.family$sdlog))
282 |     }
283 |     if (npulls.family == "Poisson"){
284 |       npulls.per.period <- rep(0, nburnin + nperiod)
285 |       for (num in 1:(nburnin + nperiod)){
286 |         while(TRUE){
287 |           npulls.per.period[num] <- rpois(1, lambda = data.par$npulls.family$lambda)
288 |           if (npulls.per.period[num] > 0){
289 |             break
290 |           }
291 |         }
292 |       }
293 |     }
294 |     
295 |     for (k in 1:length(method)){
296 |       regret <- SimulateMultiplePeriods(method = method[k],
297 |                                         nburnin = nburnin,
298 |                                         nperiod = nperiod,
299 |                                         mean.reward = mean.reward,
300 |                                         reward.family = reward.family,
301 |                                         sd.reward = data.par$reward.family$sd,
302 |                                         npulls.per.period = npulls.per.period,
303 |                                         method.par = method.par)$regret
304 |       regret.matrix[, i, k] <- regret
305 |     }
306 |   }
307 |   
308 |   if (regret.plot == TRUE){
309 |     relativeRegret <- c(apply(regret.matrix, c(1,3), mean))
310 |     methodName <- rep(method, each = nperiod)
311 |     daySeq <- rep(1:nperiod, nmethod)
312 |     graphData <- data.frame(daySeq, relativeRegret, methodName)
313 |     regret.plot.object <- ggplot(graphData,
314 |                                  aes(x = daySeq,
315 |                                      y = relativeRegret,
316 |                                      colour = factor(methodName))) +
317 |       geom_line() + ggtitle("Regret Plot") +
318 |       theme(legend.text = element_text(size = 12, face = "bold"),
319 |             axis.text.y = element_text(size = 12),
320 |             legend.title = element_blank()) +
321 |       labs(y = "Relative Regret", x = "day")
322 |     return(list(regret.matrix = regret.matrix,
323 |                 regret.plot.object = regret.plot.object))
324 |   }else{
325 |     return(list(regret.matrix = regret.matrix))
326 |   }
327 | }
328 | 
329 | 
330 | 


--------------------------------------------------------------------------------
/R/SimulateMultiplePeriods.R:
--------------------------------------------------------------------------------
  1 | # Copyright 2012-2017 Google
  2 | # Licensed under the Apache License, Version 2.0 (the "License");
  3 | # you may not use this file except in compliance with the License.
  4 | # You may obtain a copy of the License at
  5 | #
  6 | #     http://www.apache.org/licenses/LICENSE-2.0
  7 | #
  8 | # Unless required by applicable law or agreed to in writing, software
  9 | # distributed under the License is distributed on an "AS IS" BASIS,
 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 11 | # See the License for the specific language governing permissions and
 12 | # limitations under the License.
 13 | 
 14 | #' Simulate strategies for Multi-Armed Bandit in multiple periods
 15 | #'
 16 | #' This function is aimed to simulate data to run
 17 | #' strategies of Multi-Armed Bandit in a sequence of periods. Weight plot
 18 | #' and regret plot are provided if needed. In each period there could be
 19 | #' multiple pulls and each method can only be applied once. The default setting
 20 | #' is that in each period there is only 1 pull, corresponding to continuous
 21 | #' updating.
 22 | #'
 23 | #' Various methods have been implemented. "Epsilon-Greedy" and
 24 | #' "Epsilon-Decreasing" allocates \eqn{1 - epsilon} traffic to the arm which has
 25 | #' the largest average reward and equally distribute the traffic
 26 | #' to other arms. For "Epsilon-Greedy" epsilon in \code{method.par} serves as
 27 | #' constant exploration rate . For "Epsilon-Decreasing" epsilon in
 28 | #' \code{method.par} serves as exploration rate at period 1,
 29 | #' while in period \eqn{t} exploration rate is \eqn{epsilon / t}.
 30 | #' See \url{https://en.wikipedia.org/wiki/Multi-armed_bandit#Approximate_solutions}
 31 | #' for more details about these strategies.
 32 | #'
 33 | #' "Thompson-Sampling" refers to Beta-Binomial Thompson Sampling using
 34 | #' Beta(1, 1) as a prior. "Bayes-Poisson-TS" refers to Poisson-Gamma Thompson
 35 | #' Sampling using a Bayesian Generalized Linear
 36 | #' Mixed Effects Model to compute weights. "Bayes-Poisson-TS",
 37 | #' "Greedy-Bayes-Poisson-TS" and "EXP3-Bayes-Poisson-TS" depends on the package
 38 | #' "emre" to compute posterior distribution. For algorithm
 39 | #' details, see the paper \url{https://arxiv.org/abs/1602.00047}.
 40 | #'
 41 | #' UCB (Upper Confidence Bound) is a classical method for Multi-Armed Bandit.
 42 | #' For algorithm details, see the paper
 43 | #' \url{http://personal.unileoben.ac.at/rortner/Pubs/UCBRev.pdf}.
 44 | #' EXP3 is a method which needs to specify exploration rate \code{gamma} and
 45 | #' exploitation rate \code{eta}. For algorithm details, see the paper
 46 | #' \url{https://cseweb.ucsd.edu/~yfreund/papers/bandits.pdf}.
 47 | #'
 48 | #' Ensemble methods are also implemented. "Greedy-Thompson-Sampling" and
 49 | #' "Greedy-Bayes-Poisson-TS" allocate \eqn{1 - epsilon} traffic to the arm
 50 | #' corresponding to the largest
 51 | #' Thompson sampling weight and allocate \eqn{epsilon} traffic
 52 | #' corresponding to Thompson sampling weights.
 53 | #' Instead of using average reward for each period to update weights in "EXP3",
 54 | #' "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS" use Thompson sampling
 55 | #' weights in the updating formula in "EXP3".
 56 | #' "HyperTS" is an ensemble by applying Thompson Sampling to selecting the best
 57 | #' method in each period based on previous performance. For algorithm details,
 58 | #' see the paper
 59 | #' \url{http://yxjiang.github.io/paper/RecSys2014-ensemble-bandit.pdf}.
 60 | #'
 61 | #' To measure the performance. Regret is computed by summing over the products
 62 | #' of the number of pulls on one arm at one period and
 63 | #' the difference of the mean reward of that arm compared with the largest one.
 64 | #' Relative regret is
 65 | #' computed by dividing the regret of a certain method over the regret of the
 66 | #' benchmark method that allocates equal weights to each arm
 67 | #' throughout all the periods.
 68 | #'
 69 | #'
 70 | #' @param method A character string choosing from "Epsilon-Greedy",
 71 | #' "Epsilon-Decreasing", "Thompson-Sampling",
 72 | #' "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling",
 73 | #' "EXP3-Thompson-Sampling",
 74 | #' "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS" and "HyperTS".
 75 | #' For details of these methods, see below. Default is "Thompson-Sampling".
 76 | #' @param method.par A list of parameters needed for different methods:
 77 | #'
 78 | #' \code{epsilon}: A real number between 0 and 1; needed for "Epsilon-Greedy",
 79 | #' "Epsilon-Decreasing", "Greedy-Thompson-Sampling" and
 80 | #' "Greedy-Bayes-Poisson-TS".
 81 | #'
 82 | #' \code{ndraws.TS}: A positive integer specifying the number of random draws
 83 | #' from the posterior;
 84 | #' needed for "Thompson-Sampling", "Greedy-Thompson-Sampling" and
 85 | #' "EXP3-Thompson-Sampling".  Default is 1000.
 86 | #'
 87 | #' \code{EXP3}: A list consisting of two real numbers \code{eta} and
 88 | #' \code{gamma};
 89 | #' \eqn{eta > 0} and \eqn{0 <= gamma < 1}; needed for "EXP3",
 90 | #' "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS".
 91 | #'
 92 | #' \code{BP}: A list consisting of three postive integers \code{iter.BP},
 93 | #' \code{ndraws.BP} and \code{interval.BP};
 94 | #' needed for "Bayes-Poisson-TS", "Greedy-Bayes-Poisson-TS" and
 95 | #' "EXP3-Bayes-Poisson-TS"; \code{iter.BP} specifies the number of iterations
 96 | #' to compute posterior;
 97 | #' \code{ndraws.BP} specifies the number of posterior samples
 98 | #' drawn from posterior distribution; \code{interval.BP} is specified to draw
 99 | #' each posterior sample from a sample sequence of length \code{interval.BP}.
100 | #'
101 | #' \code{HyperTS}: A list consisting of a vector \code{method.list},
102 | #' needed for "HyperTS". \code{method.list} is a vector of character strings
103 | #' choosing from "Epsilon-Greedy", "Epsilon-Decreasing", "Thompson-Sampling",
104 | #' "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling",
105 | #' "EXP3-Thompson-Sampling", "Greedy-Bayes-Poisson-TS" and
106 | #' "EXP3-Bayes-Poisson-TS". "HyperTS" will construct an ensemble consisting all
107 | #' the methods in \code{method.list}.
108 | #' @param nburnin A positive integer specifying the number of periods to
109 | #' allocate each arm equal traffic before applying any strategy.
110 | #' @param nperiod A positive integer specifying the number of periods
111 | #' to apply the strategy.
112 | #' @param npulls.per.period  A positive integer  or a vector of positive
113 | #' integers. Default value is 1. If \code{npulls.per.period} is a positive
114 | #' integer, the number of pulls is \code{npulls.per.period} for each period.
115 | #' If \code{npulls.per.period} is a vector, each element represents
116 | #' the number of pulls for one period; the length of \code{npulls.per.period}
117 | #' should be equal to \code{nburnin} + \code{nperiod}.
118 | #' @param reward.family A character string specifying the distribution family
119 | #' of reward. Available distribution includes
120 | #'  "Bernoulli", "Poisson" and "Gaussian". If "Gaussian" is chosen to be the
121 | #' reward distribution,
122 | #' a vector of standard deviation should be provided in \code{sd.reward}.
123 | #' @param sd.reward A vector of non-negative numbers specifying
124 | #' standard deviation of each arm's reward distribution if "Gaussian" is chosen
125 | #' to be the reward distribution. Default to be NULL.
126 | #' See \code{reward.family}.
127 | #' @param mean.reward A vector or a matrix of real numbers specifying the mean
128 | #' reward of each arm. If \code{mean.reward} is a vector, each element is the
129 | #' mean reward for each arm and the mean reward of each arm is unchanged
130 | #' throughout all periods (corresponding to the stationary Multi-Armed Bandit).
131 | #'  If \code{mean.reward} is a matrix, it should
132 | #'  have (\code{nburnin} + \code{nperiod}) rows. The mean reward of each arm
133 | #' could change. Each row represents a mean reward vector for each period
134 | #'   (corresponding to nonstationary and adversarial Multi-Armed Bandit).
135 | #' @param weight.plot A logic value with FALSE as default. If TRUE, weight plot
136 | #' object for each arm is returned.
137 | #' @param regret.plot A logic value with FALSE as default. If TRUE,  relative
138 | #' regret plot object is returned.
139 | #'
140 | #' @return a list consisting of:
141 | #' \item{weight}{A weight matrix whose each element is the allocated weight
142 | #' for each arm and period. Each row represents one arm and each column
143 | #' represents one period.}
144 | #' \item{regret}{A relative regret vector whose each element is relative regret
145 | #' for each period. For definition of relative regret, see above.}
146 | #' \item{weight.plot.object}{If weight.plot = TRUE, a ggplot object is returned.}
147 | #' \item{regret.plot.object}{If regret.plot = TRUE, a ggplot object is returned.}
148 | #' @export
149 | #' @examples
150 | #' ### Simulate Thompson-Sampling
151 | #' set.seed(100)
152 | #' res <- SimulateMultiplePeriods(method = "Thompson-Sampling",
153 | #'                                method.par = list(ndraws.TS = 1000),
154 | #'                                nburnin = 30,
155 | #'                                nperiod = 180,
156 | #'                                npulls.per.period = 5,
157 | #'                                reward.family = "Bernoulli",
158 | #'                                mean.reward = runif(3, 0, 0.1),
159 | #'                                weight.plot = TRUE)
160 | #' res$weight.plot.object
161 | #' ### Simulate EXP3-Thompson-Sampling
162 | #' set.seed(100)
163 | #' res <- SimulateMultiplePeriods(
164 | #'            method = "EXP3-Thompson-Sampling",
165 | #'            method.par = list(ndraws.TS = 1000,
166 | #'                              EXP3 = list(gamma = 0, eta = 0.1)),
167 | #'            nburnin = 30,
168 | #'            nperiod = 180,
169 | #'            npulls.per.period = 5,
170 | #'            reward.family = "Bernoulli",
171 | #'            mean.reward = runif(3, 0, 0.1),
172 | #'            weight.plot = TRUE)
173 | #' res$weight.plot.object
174 | #' ### Simulate ensemble method HyperTS given "Thompson-Sampling", "Epsilon-Greedy" and "Epsilon-Decreasing"
175 | #' set.seed(100)
176 | #' res <- SimulateMultiplePeriods(
177 | #'            method = "HyperTS",
178 | #'            method.par = list(
179 | #'                ndraws.TS = 1000,
180 | #'                epsilon = 0.1,
181 | #'                HyperTS = list(method.list = c("Thompson-Sampling",
182 | #'                                               "Epsilon-Greedy",
183 | #'                                               "Epsilon-Decreasing"))),
184 | #'                nburnin = 30,
185 | #'                nperiod = 180,
186 | #'                npulls.per.period = 5,
187 | #'                reward.family = "Poisson",
188 | #'                mean.reward = runif(3, 0, 0.1),
189 | #'                weight.plot = TRUE)
190 | #' res$weight.plot.object
191 | 
192 | SimulateMultiplePeriods <- function(method = "Thompson-Sampling",
193 |                                     method.par = list(ndraws.TS = 1000),
194 |                                     nburnin,
195 |                                     nperiod,
196 |                                     reward.family,
197 |                                     mean.reward,
198 |                                     sd.reward = NULL,
199 |                                     npulls.per.period = 1,
200 |                                     weight.plot = FALSE,
201 |                                     regret.plot = FALSE){
202 |   if (! method %in% c("Epsilon-Greedy", "Epsilon-Decreasing",
203 |                       "Thompson-Sampling","EXP3", "UCB", "Bayes-Poisson-TS",
204 |                       "Greedy-Thompson-Sampling", "EXP3-Thompson-Sampling",
205 |                       "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS",
206 |                       "HyperTS")){
207 |     stop("Please specify correct method names!")
208 |   }
209 | 
210 |   if (is.vector(mean.reward)){
211 |     rewardVec <- mean.reward
212 |   }
213 | 
214 |   if (length(npulls.per.period) == 1){
215 |     npullsVec <- npulls.per.period
216 |   }else if(length(npulls.per.period) != nburnin + nperiod |
217 |            min(npulls.per.period) <= 0){
218 |     stop("Please specify correct number of pulls per period!")
219 |   }
220 | 
221 |   if (is.vector(mean.reward)){
222 |     bestReward <- max(rewardVec)
223 |     n <- length(rewardVec)
224 |   }else{
225 |     bestReward <- apply(mean.reward, 1, max)
226 |     n <- dim(mean.reward)[2]
227 |   }
228 | 
229 |   if (reward.family == "Gaussian" &
230 |       (length(sd.reward) != n | anyNA(sd.reward))){
231 |     stop("Please specify correct standard deviation for Gaussian reward family!")
232 |   }
233 | 
234 |   if (is.vector(mean.reward)){
235 |     if (length(npulls.per.period) == 1){
236 |       burninTrial <- rmultinom(1, nburnin * npullsVec, rep(1 / n, n))
237 |     }else{
238 |       burninTrial <- rmultinom(1, sum(npulls.per.period[1:nburnin]),
239 |                                rep(1 / n, n))
240 |     }
241 |     burninReward <- apply(cbind(burninTrial, rewardVec, sd.reward), 1,
242 |                           GetReward, reward.family)
243 |   } else {
244 |     burninTrial <- rep(0, n)
245 |     burninReward <- rep(0, n)
246 |     if (length(npulls.per.period) == 1){
247 |       for (period in 1:nburnin){
248 |         tempTrial <- rmultinom(1, npullsVec, rep(1 / n, n))
249 |         tempReward <- apply(cbind(tempTrial, mean.reward[period, ], sd.reward),
250 |                             1,  GetReward, reward.family)
251 |         burninTrial <- burninTrial + tempTrial
252 |         burninReward <- burninReward + tempReward
253 |       }
254 |     } else {
255 |       for (period in 1:nburnin){
256 |         tempTrial <- rmultinom(1, npulls.per.period[period], rep(1 / n, n))
257 |         tempReward <- apply(cbind(tempTrial, mean.reward[period, ], sd.reward),
258 |                             1, GetReward, reward.family)
259 |         burninTrial <- burninTrial + tempTrial
260 |         burninReward <- burninReward + tempReward
261 |       }
262 |     }
263 |   }
264 |   burninEvent <- data.frame(trial = burninTrial, reward = burninReward)
265 | 
266 |   equalDailyRegret <- rep(0, nperiod)
267 |   for (period in 1:nperiod){
268 |     if (length(npulls.per.period) == 1){
269 |       dailyTrial <- rep(npullsVec / n, n)
270 |     }else{
271 |       dailyTrial <- rep(npulls.per.period[nburnin + period] / n, n)
272 |     }
273 |     if (is.vector(mean.reward)){
274 |       equalDailyRegret[period] <- sum(dailyTrial * (bestReward - rewardVec))
275 |     }else{
276 |       equalDailyRegret[period] <-
277 |           sum(dailyTrial * (bestReward[nburnin + period] -
278 |                             mean.reward[nburnin + period, ]))
279 |     }
280 |   }
281 | 
282 |   allWeight <- cbind()
283 |   all.event <- burninEvent
284 |   dailyRegret <- rep(0, nperiod)
285 |   weight <- as.vector(CalculateWeight(method = "Thompson-Sampling",
286 |                                       sd.reward = sd.reward,
287 |                                       reward.family = reward.family,
288 |                                       all.event = all.event,
289 |                                       method.par = list(ndraws.TS = 1000)))
290 |   EXP3Info <- list(prevWeight = weight,
291 |                    EXP3Trial = burninTrial,
292 |                    EXP3Reward = burninReward)
293 | 
294 |   if (method == "HyperTS"){
295 |     nmethod <- length(method.par$HyperTS$method.list)
296 |     total.reward <- rep(0, nmethod)
297 |     if (reward.family == "Bernoulli"){
298 |       total.trial <- rep(0, nmethod)
299 |     }
300 |     if (reward.family == "Gaussian" | reward.family == "Poisson"){
301 |       total.trial <- rep(1, nmethod)
302 |     }
303 |   }
304 | 
305 |   for (period in 1:nperiod){
306 |     if (method == "HyperTS"){
307 |       ndraws <- 1000
308 |       ans <- matrix(nrow = ndraws, ncol = nmethod)
309 |       if (reward.family == "Bernoulli"){
310 |         for (i in 1:nmethod) ans[ ,i] <-
311 |             rbeta(ndraws, total.reward[i] + 1,
312 |                   total.trial[i] - total.reward[i] + 1)
313 |       }
314 |       if (reward.family == "Gaussian"){
315 |         for (i in 1:nmethod) ans[ ,i] <-
316 |             rnorm(ndraws, mean = total.reward[i] / total.trial[i],
317 |                   sd = sd.reward[i] / sqrt(total.trial[i]))
318 |       }
319 |       if (reward.family == "Poisson"){
320 |         for (i in 1:nmethod) ans[ ,i] <-
321 |             rgamma(ndraws, shape = total.reward[i] + 1,
322 |                    scale = 1 / total.trial[i])
323 |       }
324 |       method.index <- which.max(as.vector(table(factor(max.col(ans),
325 |                                                        levels = 1:nmethod))))
326 |       method.chosen <- method.par$HyperTS$method.list[method.index]
327 |       weight <- CalculateWeight(method.chosen,
328 |                                 all.event = all.event,
329 |                                 sd.reward = sd.reward,
330 |                                 reward.family = reward.family,
331 |                                 method.par = method.par,
332 |                                 period = period,
333 |                                 EXP3Info = EXP3Info)
334 |     }
335 | 
336 |     if (method != "HyperTS"){
337 |       weight <- CalculateWeight(method,
338 |                                 all.event = all.event,
339 |                                 sd.reward = sd.reward,
340 |                                 reward.family = reward.family,
341 |                                 method.par = method.par,
342 |                                 period = period,
343 |                                 EXP3Info = EXP3Info)
344 |     }
345 | 
346 |     allWeight <- cbind(allWeight, weight)
347 |     if (length(npulls.per.period) == 1){
348 |       dailyTrial <- rmultinom(1, npullsVec, weight)
349 |     } else {
350 |       dailyTrial <- rmultinom(1, npulls.per.period[nburnin + period], weight)
351 |     }
352 |     if (is.vector(mean.reward)){
353 |       dailyReward <- apply(cbind(dailyTrial, rewardVec, sd.reward),
354 |                            1, GetReward, reward.family)
355 |     } else {
356 |       dailyReward <-
357 |         apply(cbind(dailyTrial, mean.reward[nburnin + period, ], sd.reward),
358 |               1, GetReward, reward.family)
359 |     }
360 |     all.event$trial <- all.event$trial + dailyTrial
361 |     all.event$reward <- all.event$reward + dailyReward
362 |     if (is.vector(mean.reward)){
363 |       dailyRegret[period] <- sum(dailyTrial * (bestReward - rewardVec))
364 |     }else{
365 |       dailyRegret[period] <-
366 |         sum(dailyTrial *
367 |               (bestReward[nburnin + period] - mean.reward[nburnin + period, ]))
368 |     }
369 |     EXP3Info = list(prevWeight = weight,
370 |                     EXP3Trial = dailyTrial,
371 |                     EXP3Reward = dailyReward)
372 |     if (method == "HyperTS"){
373 |       total.reward[method.index] <-
374 |           total.reward[method.index] + sum(dailyReward)
375 |       total.trial[method.index] <- total.trial[method.index] + sum(dailyTrial)
376 |     }
377 |   }
378 |   relativeRegret <- ifelse(equalDailyRegret != 0,
379 |                            dailyRegret / equalDailyRegret, 0)
380 | 
381 |   if (weight.plot == TRUE){
382 |     weightVector <- c(t(allWeight))
383 |     if (is.vector(mean.reward)){
384 |       names <- rep(sapply(rewardVec, function(x) paste("Mean Reward =", x)),
385 |                    each = nperiod)
386 |     }else{
387 |       names <- rep(sapply(1:n, function(x) paste("Arm", x)), each = nperiod)
388 |     }
389 |     periodSeq <- rep(1:nperiod, n)
390 |     graphData <- data.frame(names, periodSeq, weightVector)
391 |     weight.plot.object <- ggplot(graphData,
392 |                                  aes(x = periodSeq,
393 |                                      y = weightVector,
394 |                                      colour = names)) +
395 |       geom_line() + ggtitle("Weight Plot") +
396 |       theme(legend.text = element_text(size = 12, face = "bold"),
397 |             axis.text.y = element_text(size = 12),
398 |             legend.title = element_blank()) +
399 |       labs(y = "weight", x = "period")
400 |   }
401 |   if (regret.plot == TRUE){
402 |     periodSeq <- 1:nperiod
403 |     graphData <- data.frame(periodSeq, relativeRegret)
404 |     regret.plot.object <- ggplot(graphData,
405 |                                  aes(x = periodSeq,
406 |                                      y = relativeRegret)) +
407 |       geom_line() + ggtitle("Regret Plot") +
408 |       theme(legend.text = element_text(size = 12, face = "bold"),
409 |             axis.text.y = element_text(size = 12),
410 |             legend.title = element_blank()) +
411 |       labs(y = "Relative Regret", x = "period")
412 |   }
413 |   if (weight.plot == TRUE & regret.plot == TRUE){
414 |     return(list(weight = allWeight,
415 |                 regret = relativeRegret,
416 |                 weight.plot.object = weight.plot.object,
417 |                 regret.plot.object = regret.plot.object))
418 |   }
419 |   if (weight.plot == FALSE & regret.plot == TRUE){
420 |     return(list(weight = allWeight,
421 |                 regret = relativeRegret,
422 |                 regret.plot.object = regret.plot.object))
423 |   }
424 |   if (weight.plot == TRUE & regret.plot == FALSE){
425 |     return(list(weight = allWeight,
426 |                 regret = relativeRegret,
427 |                 weight.plot.object = weight.plot.object))
428 |   }
429 |   if (weight.plot == FALSE & regret.plot == FALSE){
430 |     return(list(weight = allWeight, regret = relativeRegret))
431 |   }
432 | }
433 | 
434 | GetReward <- function(x, reward.family, sd.reward){
435 |   if (reward.family == "Bernoulli") {
436 |     return(rbinom(n = 1, size = x[1], prob = x[2]))
437 |   }
438 |   if (reward.family == "Gaussian") {
439 |     return(rnorm(n = 1, mean = x[1] * x[2], sd = x[3] * sqrt(x[1])))
440 |   }
441 |   if (reward.family == "Poisson"){
442 |     return(rpois(n = 1, lambda = x[1] * x[2]))
443 |   }
444 | }
445 | 


--------------------------------------------------------------------------------
/R/SimulateMultiplePeriods.R~:
--------------------------------------------------------------------------------
  1 | # Copyright 2012-2017 Google
  2 | # Licensed under the Apache License, Version 2.0 (the "License");
  3 | # you may not use this file except in compliance with the License.
  4 | # You may obtain a copy of the License at
  5 | #
  6 | #     http://www.apache.org/licenses/LICENSE-2.0
  7 | #
  8 | # Unless required by applicable law or agreed to in writing, software
  9 | # distributed under the License is distributed on an "AS IS" BASIS,
 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 11 | # See the License for the specific language governing permissions and
 12 | # limitations under the License.
 13 | 
 14 | #' Simulate strategies for Multi-Armed Bandit in multiple periods
 15 | #'
 16 | #' This function is aimed to simulate data to run
 17 | #' strategies of Multi-Armed Bandit in a sequence of periods. Weight plot
 18 | #' and regret plot are provided if needed. In each period there could be
 19 | #' multiple pulls and each method can only be applied once. The default setting
 20 | #' is that in each period there is only 1 pull, corresponding to continuous
 21 | #' updating.
 22 | #'
 23 | #' Various methods have been implemented. "Epsilon-Greedy" and
 24 | #' "Epsilon-Decreasing" allocates \eqn{1 - epsilon} traffic to the arm which has
 25 | #' the largest average reward and equally distribute the traffic
 26 | #' to other arms. For "Epsilon-Greedy" epsilon in \code{method.par} serves as
 27 | #' constant exploration rate . For "Epsilon-Decreasing" epsilon in
 28 | #' \code{method.par} serves as exploration rate at period 1,
 29 | #' while in period \eqn{t} exploration rate is \eqn{epsilon / t}.
 30 | #' See \url{https://en.wikipedia.org/wiki/Multi-armed_bandit#Approximate_solutions}
 31 | #' for more details about these strategies.
 32 | #'
 33 | #' "Thompson-Sampling" refers to Beta-Binomial Thompson Sampling using
 34 | #' Beta(1, 1) as a prior. "Bayes-Poisson-TS" refers to Poisson-Gamma Thompson
 35 | #' Sampling using a Bayesian Generalized Linear
 36 | #' Mixed Effects Model to compute weights. "Bayes-Poisson-TS",
 37 | #' "Greedy-Bayes-Poisson-TS" and "EXP3-Bayes-Poisson-TS" depends on the package
 38 | #' "emre" to compute posterior distribution. For algorithm
 39 | #' details, see the paper \url{https://arxiv.org/abs/1602.00047}.
 40 | #'
 41 | #' UCB (Upper Confidence Bound) is a classical method for Multi-Armed Bandit.
 42 | #' For algorithm details, see the paper
 43 | #' \url{http://personal.unileoben.ac.at/rortner/Pubs/UCBRev.pdf}.
 44 | #' EXP3 is a method which needs to specify exploration rate \code{gamma} and
 45 | #' exploitation rate \code{eta}. For algorithm details, see the paper
 46 | #' \url{https://cseweb.ucsd.edu/~yfreund/papers/bandits.pdf}.
 47 | #'
 48 | #' Ensemble methods are also implemented. "Greedy-Thompson-Sampling" and
 49 | #' "Greedy-Bayes-Poisson-TS" allocate \eqn{1 - epsilon} traffic to the arm
 50 | #' corresponding to the largest
 51 | #' Thompson sampling weight and allocate \eqn{epsilon} traffic
 52 | #' corresponding to Thompson sampling weights.
 53 | #' Instead of using average reward for each period to update weights in "EXP3",
 54 | #' "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS" use Thompson sampling
 55 | #' weights in the updating formula in "EXP3".
 56 | #' "HyperTS" is an ensemble by applying Thompson Sampling to selecting the best
 57 | #' method in each period based on previous performance. For algorithm details,
 58 | #' see the paper
 59 | #' \url{http://yxjiang.github.io/paper/RecSys2014-ensemble-bandit.pdf}.
 60 | #'
 61 | #' To measure the performance. Regret is computed by summing over the products
 62 | #' of the number of pulls on one arm at one period and
 63 | #' the difference of the mean reward of that arm compared with the largest one.
 64 | #' Relative regret is
 65 | #' computed by dividing the regret of a certain method over the regret of the
 66 | #' benchmark method that allocates equal weights to each arm
 67 | #' throughout all the periods.
 68 | #'
 69 | #'
 70 | #' @param method A character string choosing from "Epsilon-Greedy",
 71 | #' "Epsilon-Decreasing", "Thompson-Sampling",
 72 | #' "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling",
 73 | #' "EXP3-Thompson-Sampling",
 74 | #' "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS" and "HyperTS".
 75 | #' For details of these methods, see below. Default is "Thompson-Sampling".
 76 | #' @param method.par A list of parameters needed for different methods:
 77 | #'
 78 | #' \code{epsilon}: A real number between 0 and 1; needed for "Epsilon-Greedy",
 79 | #' "Epsilon-Decreasing", "Greedy-Thompson-Sampling" and "Greedy-Bayes-Poisson-TS".
 80 | #'
 81 | #' \code{ndraws.TS}: A positive integer specifying the number of random draws
 82 | #' from the posterior;
 83 | #' needed for "Thompson-Sampling", "Greedy-Thompson-Sampling" and
 84 | #' "EXP3-Thompson-Sampling".  Default is 1000.
 85 | #'
 86 | #' \code{EXP3}: A list consisting of two real numbers \code{eta} and \code{gamma};
 87 | #' \eqn{eta > 0} and \eqn{0 <= gamma < 1}; needed for "EXP3",
 88 | #' "EXP3-Thompson-Sampling" and "EXP3-Bayes-Poisson-TS".
 89 | #'
 90 | #' \code{BP}: A list consisting of three postive integers \code{iter.BP},
 91 | #' \code{ndraws.BP} and \code{interval.BP};
 92 | #' needed for "Bayes-Poisson-TS", "Greedy-Bayes-Poisson-TS" and
 93 | #' "EXP3-Bayes-Poisson-TS"; \code{iter.BP} specifies the number of iterations
 94 | #' to compute posterior;
 95 | #' \code{ndraws.BP} specifies the number of posterior samples
 96 | #' drawn from posterior distribution; \code{interval.BP} is specified to draw
 97 | #' each posterior sample from a sample sequence of length \code{interval.BP}.
 98 | #'
 99 | #' \code{HyperTS}: A list consisting of a vector \code{method.list},
100 | #' needed for "HyperTS". \code{method.list} is a vector of character strings
101 | #' choosing from "Epsilon-Greedy", "Epsilon-Decreasing", "Thompson-Sampling",
102 | #' "EXP3", "UCB", "Bayes-Poisson-TS", "Greedy-Thompson-Sampling",
103 | #' "EXP3-Thompson-Sampling", "Greedy-Bayes-Poisson-TS" and
104 | #' "EXP3-Bayes-Poisson-TS". "HyperTS" will construct an ensemble consisting all
105 | #' the methods in \code{method.list}.
106 | #' @param nburnin A positive integer specifying the number of periods to
107 | #' allocate each arm equal traffic before applying any strategy.
108 | #' @param nperiod A positive integer specifying the number of periods
109 | #' to apply the strategy.
110 | #' @param npulls.per.period  A positive integer  or a vector of positive integers.
111 | #' Default value is 1. If \code{npulls.per.period} is a positive integer,
112 | #' the number of pulls is \code{npulls.per.period} for each period.
113 | #' If \code{npulls.per.period} is a vector, each element represents
114 | #' the number of pulls for one period; the length of \code{npulls.per.period}
115 | #' should be equal to \code{nburnin} + \code{nperiod}.
116 | #' @param reward.family A character string specifying the distribution family
117 | #' of reward. Available distribution includes
118 | #'  "Bernoulli", "Poisson" and "Gaussian". If "Gaussian" is chosen to be the
119 | #' reward distribution,
120 | #' a vector of standard deviation should be provided in \code{sd.reward}.
121 | #' @param sd.reward A vector of non-negative numbers specifying
122 | #' standard deviation of each arm's reward distribution if "Gaussian" is chosen
123 | #' to be the reward distribution. Default to be NULL.
124 | #' See \code{reward.family}.
125 | #' @param mean.reward A vector or a matrix of real numbers specifying the mean
126 | #' reward of each arm. If \code{mean.reward} is a vector, each element is the
127 | #' mean reward for each arm and the mean reward of each arm is unchanged
128 | #' throughout all periods (corresponding to the stationary Multi-Armed Bandit).
129 | #'  If \code{mean.reward} is a matrix, it should
130 | #'  have (\code{nburnin} + \code{nperiod}) rows. The mean reward of each arm
131 | #' could change. Each row represents a mean reward vector for each period
132 | #'   (corresponding to nonstationary and adversarial Multi-Armed Bandit).
133 | #' @param weight.plot A logic value with FALSE as default. If TRUE, weight plot
134 | #' object for each arm is returned.
135 | #' @param regret.plot A logic value with FALSE as default. If TRUE,  relative
136 | #' regret plot object is returned.
137 | #'
138 | #' @return a list consisting of:
139 | #' \item{weight}{A weight matrix whose each element is the allocated weight
140 | #' for each arm and period. Each row represents one arm and each column
141 | #' represents one period.}
142 | #' \item{regret}{A relative regret vector whose each element is relative regret
143 | #' for each period. For definition of relative regret, see above.}
144 | #' \item{weight.plot.object}{If weight.plot = TRUE, a ggplot object is returned.}
145 | #' \item{regret.plot.object}{If regret.plot = TRUE, a ggplot object is returned.}
146 | #' @export
147 | #' @examples
148 | #' ### Simulate Thompson-Sampling
149 | #' set.seed(100)
150 | #' res <- SimulateMultiplePeriods(method = "Thompson-Sampling",
151 | #'                                method.par = list(ndraws.TS = 1000),
152 | #'                                nburnin = 30,
153 | #'                                nperiod = 180,
154 | #'                                npulls.per.period = 5,
155 | #'                                reward.family = "Bernoulli",
156 | #'                                mean.reward = runif(3, 0, 0.1),
157 | #'                                weight.plot = TRUE)
158 | #' res$weight.plot.object
159 | #' ### Simulate EXP3-Thompson-Sampling
160 | #' set.seed(100)
161 | #' res <- SimulateMultiplePeriods(method = "EXP3-Thompson-Sampling",
162 | #'                                method.par = list(ndraws.TS = 1000,
163 | #'                                                  EXP3 = list(gamma = 0, eta = 0.1)),
164 | #'                                nburnin = 30,
165 | #'                                nperiod = 180,
166 | #'                                npulls.per.period = 5,
167 | #'                                reward.family = "Bernoulli",
168 | #'                                mean.reward = runif(3, 0, 0.1),
169 | #'                                weight.plot = TRUE)
170 | #' res$weight.plot.object
171 | #' ### Simulate ensemble method HyperTS given "Thompson-Sampling", "Epsilon-Greedy" and "Epsilon-Decreasing"
172 | #' set.seed(100)
173 | #' res <- SimulateMultiplePeriods(method = "HyperTS",
174 | #'                                method.par = list(ndraws.TS = 1000,
175 | #'                                                  epsilon = 0.1,
176 | #'                                                  HyperTS = list(method.list = c("Thompson-Sampling",
177 | #'                                                                                 "Epsilon-Greedy",
178 | #'                                                                                 "Epsilon-Decreasing"))),
179 | #'                                nburnin = 30,
180 | #'                                nperiod = 180,
181 | #'                                npulls.per.period = 5,
182 | #'                                reward.family = "Poisson",
183 | #'                                mean.reward = runif(3, 0, 0.1),
184 | #'                                weight.plot = TRUE)
185 | #' res$weight.plot.object
186 | 
187 | 
188 | 
189 | 
190 | SimulateMultiplePeriods <- function(method = "Thompson-Sampling",
191 |                                     method.par = list(ndraws.TS = 1000),
192 |                                     nburnin,
193 |                                     nperiod,
194 |                                     reward.family,
195 |                                     mean.reward,
196 |                                     sd.reward = NULL,
197 |                                     npulls.per.period = 1,
198 |                                     weight.plot = FALSE,
199 |                                     regret.plot = FALSE){
200 |   if (! method %in% c("Epsilon-Greedy", "Epsilon-Decreasing",
201 |                       "Thompson-Sampling","EXP3", "UCB", "Bayes-Poisson-TS",
202 |                       "Greedy-Thompson-Sampling", "EXP3-Thompson-Sampling",
203 |                       "Greedy-Bayes-Poisson-TS", "EXP3-Bayes-Poisson-TS",
204 |                       "HyperTS")){
205 |     stop("Please specify correct method names!")
206 |   }
207 |   
208 |   if (is.vector(mean.reward)){
209 |     rewardVec <- mean.reward
210 |   }
211 |   
212 |   
213 |   if (length(npulls.per.period) == 1){
214 |     npullsVec <- npulls.per.period
215 |   }else if(length(npulls.per.period) != nburnin + nperiod |
216 |            min(npulls.per.period) <= 0){
217 |     stop("Please specify correct number of pulls per period!")
218 |   }
219 |   
220 |   if (is.vector(mean.reward)){
221 |     bestReward <- max(rewardVec)
222 |     n <- length(rewardVec)
223 |   }else{
224 |     bestReward <- apply(mean.reward, 1, max)
225 |     n <- dim(mean.reward)[2]
226 |   }
227 |   
228 |   
229 |   if (reward.family == "Gaussian" &
230 |       (length(sd.reward) != n | anyNA(sd.reward))){
231 |     stop("Please specify correct standard deviation for Gaussian reward family!")
232 |   }
233 |   
234 |   if (is.vector(mean.reward)){
235 |     if (length(npulls.per.period) == 1){
236 |       burninTrial <- rmultinom(1, nburnin * npullsVec, rep(1 / n, n))
237 |     }else{
238 |       burninTrial <- rmultinom(1, sum(npulls.per.period[1:nburnin]), rep(1 / n, n))
239 |     }
240 |     burninReward <- apply(cbind(burninTrial, rewardVec, sd.reward), 1,
241 |                           GetReward, reward.family)
242 |   }else{
243 |     burninTrial <- rep(0, n)
244 |     burninReward <- rep(0, n)
245 |     if (length(npulls.per.period) == 1){
246 |       for (period in 1:nburnin){
247 |         tempTrial <- rmultinom(1, npullsVec, rep(1 / n, n))
248 |         tempReward <- apply(cbind(tempTrial, mean.reward[period, ], sd.reward),
249 |                             1,  GetReward, reward.family)
250 |         burninTrial <- burninTrial + tempTrial
251 |         burninReward <- burninReward + tempReward
252 |       }
253 |     }else{
254 |       for (period in 1:nburnin){
255 |         tempTrial <- rmultinom(1, npulls.per.period[period], rep(1 / n, n))
256 |         tempReward <- apply(cbind(tempTrial, mean.reward[period, ], sd.reward),
257 |                             1, GetReward, reward.family)
258 |         burninTrial <- burninTrial + tempTrial
259 |         burninReward <- burninReward + tempReward
260 |       }
261 |     }
262 |   }
263 |   burninEvent <- data.frame(trial = burninTrial, reward = burninReward)
264 |   
265 |   
266 |   equalDailyRegret <- rep(0, nperiod)
267 |   for (period in 1:nperiod){
268 |     if (length(npulls.per.period) == 1){
269 |       dailyTrial <- rep(npullsVec / n, n)
270 |     }else{
271 |       dailyTrial <- rep(npulls.per.period[nburnin + period] / n, n)
272 |     }
273 |     if (is.vector(mean.reward)){
274 |       equalDailyRegret[period] <- sum(dailyTrial * (bestReward - rewardVec))
275 |     }else{
276 |       equalDailyRegret[period] <-
277 |         sum(dailyTrial * (bestReward[nburnin + period] - mean.reward[nburnin + period, ]))
278 |     }
279 |   }
280 |   
281 |   
282 |   allWeight <- cbind()
283 |   all.event <- burninEvent
284 |   dailyRegret <- rep(0, nperiod)
285 |   
286 |   weight <- as.vector(CalculateWeight(method = "Thompson-Sampling",
287 |                                       sd.reward = sd.reward,
288 |                                       reward.family = reward.family,
289 |                                       all.event = all.event,
290 |                                       method.par = list(ndraws.TS = 1000)))
291 |   EXP3Info <- list(prevWeight = weight,
292 |                    EXP3Trial = burninTrial,
293 |                    EXP3Reward = burninReward)
294 |   
295 |   if (method == "HyperTS"){
296 |     nmethod <- length(method.par$HyperTS$method.list)
297 |     total.reward <- rep(0, nmethod)
298 |     if (reward.family == "Bernoulli"){
299 |       total.trial <- rep(0, nmethod)
300 |     }
301 |     if (reward.family == "Gaussian" | reward.family == "Poisson"){
302 |       total.trial <- rep(1, nmethod)
303 |     }
304 |   }
305 |   
306 |   
307 |   
308 |   
309 |   for (period in 1:nperiod){
310 |     if (method == "HyperTS"){
311 |       ndraws <- 1000
312 |       ans <- matrix(nrow = ndraws, ncol = nmethod)
313 |       if (reward.family == "Bernoulli"){
314 |         for (i in 1:nmethod) ans[ ,i] <- rbeta(ndraws, total.reward[i] + 1,
315 |                                                total.trial[i] - total.reward[i] + 1)
316 |       }
317 |       if (reward.family == "Gaussian"){
318 |         for (i in 1:nmethod) ans[ ,i] <- rnorm(ndraws,
319 |                                                mean = total.reward[i] / total.trial[i],
320 |                                                sd = sd.reward[i] / sqrt(total.trial[i]))
321 |       }
322 |       if (reward.family == "Poisson"){
323 |         for (i in 1:nmethod) ans[ ,i] <-
324 |             rgamma(ndraws, shape = total.reward[i] + 1,
325 |                    scale = 1 / total.trial[i])
326 |       }
327 |       method.index <- which.max(as.vector(table(factor(max.col(ans),
328 |                                                        levels = 1:nmethod))))
329 |       method.chosen <- method.par$HyperTS$method.list[method.index]
330 |       weight <- CalculateWeight(method.chosen,
331 |                                 all.event = all.event,
332 |                                 sd.reward = sd.reward,
333 |                                 reward.family = reward.family,
334 |                                 method.par = method.par,
335 |                                 period = period,
336 |                                 EXP3Info = EXP3Info)
337 |     }
338 |     
339 |     if (method != "HyperTS"){
340 |       weight <- CalculateWeight(method,
341 |                                 all.event = all.event,
342 |                                 sd.reward = sd.reward,
343 |                                 reward.family = reward.family,
344 |                                 method.par = method.par,
345 |                                 period = period,
346 |                                 EXP3Info = EXP3Info)
347 |     }
348 |     
349 |     allWeight <- cbind(allWeight, weight)
350 |     if (length(npulls.per.period) == 1){
351 |       dailyTrial <- rmultinom(1, npullsVec, weight)
352 |     }else{
353 |       dailyTrial <- rmultinom(1, npulls.per.period[nburnin + period], weight)
354 |     }
355 |     if (is.vector(mean.reward)){
356 |       dailyReward <- apply(cbind(dailyTrial, rewardVec, sd.reward),
357 |                            1, GetReward, reward.family)
358 |     }else{
359 |       dailyReward <-
360 |         apply(cbind(dailyTrial, mean.reward[nburnin + period, ], sd.reward),
361 |               1, GetReward, reward.family)
362 |     }
363 |     
364 |     all.event$trial <- all.event$trial + dailyTrial
365 |     all.event$reward <- all.event$reward + dailyReward
366 |     if (is.vector(mean.reward)){
367 |       dailyRegret[period] <- sum(dailyTrial * (bestReward - rewardVec))
368 |     }else{
369 |       dailyRegret[period] <-
370 |         sum(dailyTrial *
371 |               (bestReward[nburnin + period] - mean.reward[nburnin + period, ]))
372 |     }
373 |     EXP3Info = list(prevWeight = weight,
374 |                     EXP3Trial = dailyTrial,
375 |                     EXP3Reward = dailyReward)
376 |     
377 |     if (method == "HyperTS"){
378 |       total.reward[method.index] <- total.reward[method.index] + sum(dailyReward)
379 |       total.trial[method.index] <- total.trial[method.index] + sum(dailyTrial)
380 |     }
381 |   }
382 |   relativeRegret <- ifelse(equalDailyRegret != 0,
383 |                            dailyRegret / equalDailyRegret, 0)
384 |   
385 |   if (weight.plot == TRUE){
386 |     weightVector <- c(t(allWeight))
387 |     if (is.vector(mean.reward)){
388 |       names <- rep(sapply(rewardVec, function(x) paste("Mean Reward =", x)),
389 |                    each = nperiod)
390 |     }else{
391 |       names <- rep(sapply(1:n, function(x) paste("Arm", x)), each = nperiod)
392 |     }
393 |     
394 |     periodSeq <- rep(1:nperiod, n)
395 |     graphData <- data.frame(names, periodSeq, weightVector)
396 |     weight.plot.object <- ggplot(graphData,
397 |                                  aes(x = periodSeq,
398 |                                      y = weightVector,
399 |                                      colour = names)) +
400 |       geom_line() + ggtitle("Weight Plot") +
401 |       theme(legend.text = element_text(size = 12, face = "bold"),
402 |             axis.text.y = element_text(size = 12),
403 |             legend.title = element_blank()) +
404 |       labs(y = "weight", x = "period")
405 |   }
406 |   if (regret.plot == TRUE){
407 |     periodSeq <- 1:nperiod
408 |     graphData <- data.frame(periodSeq, relativeRegret)
409 |     regret.plot.object <- ggplot(graphData,
410 |                                  aes(x = periodSeq,
411 |                                      y = relativeRegret)) +
412 |       geom_line() + ggtitle("Regret Plot") +
413 |       theme(legend.text = element_text(size = 12, face = "bold"),
414 |             axis.text.y = element_text(size = 12),
415 |             legend.title = element_blank()) +
416 |       labs(y = "Relative Regret", x = "period")
417 |   }
418 |   
419 |   if (weight.plot == TRUE & regret.plot == TRUE){
420 |     return(list(weight = allWeight,
421 |                 regret = relativeRegret,
422 |                 weight.plot.object = weight.plot.object,
423 |                 regret.plot.object = regret.plot.object))
424 |   }
425 |   if (weight.plot == FALSE & regret.plot == TRUE){
426 |     return(list(weight = allWeight,
427 |                 regret = relativeRegret,
428 |                 regret.plot.object = regret.plot.object))
429 |   }
430 |   if (weight.plot == TRUE & regret.plot == FALSE){
431 |     return(list(weight = allWeight,
432 |                 regret = relativeRegret,
433 |                 weight.plot.object = weight.plot.object))
434 |   }
435 |   if (weight.plot == FALSE & regret.plot == FALSE){
436 |     return(list(weight = allWeight, regret = relativeRegret))
437 |   }
438 | }
439 | 
440 | 
441 | 
442 | 
443 | GetReward <- function(x, reward.family, sd.reward){
444 |   if (reward.family == "Bernoulli") {
445 |     return(rbinom(n = 1, size = x[1], prob = x[2]))
446 |   }
447 |   if (reward.family == "Gaussian") {
448 |     return(rnorm(n = 1, mean = x[1] * x[2], sd = x[3] * sqrt(x[1])))
449 |   }
450 |   if (reward.family == "Poisson"){
451 |     return(rpois(n = 1, lambda = x[1] * x[2]))
452 |   }
453 | }
454 | 
455 | 
456 | 
457 | 
458 | 
459 | 


--------------------------------------------------------------------------------