├── .gitignore
├── README.md
├── SMPyBandits
    ├── Arms
    │   ├── Arm.py
    │   ├── Bernoulli.py
    │   ├── Binomial.py
    │   ├── Constant.py
    │   ├── DiscreteArm.py
    │   ├── Exponential.py
    │   ├── Gamma.py
    │   ├── Gaussian.py
    │   ├── Poisson.py
    │   ├── README.md
    │   ├── RestedRottingArm.py
    │   ├── RestlessArm.py
    │   ├── UniformArm.py
    │   ├── __init__.py
    │   ├── kullback.py
    │   └── usenumba.py
    ├── C_Interface
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── README.md
    │   ├── Sample.py
    │   ├── test.c
    │   ├── test.cpp
    │   ├── test.py
    │   ├── test2.cpp
    │   └── test_sub.cpp
    ├── Environment
    │   ├── CollisionModels.py
    │   ├── Evaluator.py
    │   ├── EvaluatorMultiPlayers.py
    │   ├── EvaluatorSparseMultiPlayers.py
    │   ├── MAB.py
    │   ├── MAB_rotting.py
    │   ├── README.md
    │   ├── Result.py
    │   ├── ResultMultiPlayers.py
    │   ├── StrategicEvaluator.py
    │   ├── StrategicResult.py
    │   ├── __init__.py
    │   ├── fairnessMeasures.py
    │   ├── memory_consumption.py
    │   ├── notify.py
    │   ├── plot_Cmu_HOI.py
    │   ├── plotsettings.py
    │   ├── pykov.py
    │   ├── sortedDistance.py
    │   ├── usejoblib.py
    │   ├── usenumba.py
    │   └── usetqdm.py
    ├── Experiment
    │   ├── Seznec19a_Fig1
    │   │   ├── .gitignore
    │   │   ├── main.py
    │   │   ├── plot.py
    │   │   └── style.mplstyle
    │   ├── Seznec19a_Fig2
    │   │   ├── .gitignore
    │   │   ├── main.py
    │   │   ├── plot.py
    │   │   └── style.mplstyle
    │   ├── Seznec19a_Fig3
    │   │   ├── .gitignore
    │   │   ├── main.py
    │   │   ├── plot.py
    │   │   └── style.mplstyle
    │   ├── Seznec19b_Fig1
    │   │   ├── .gitignore
    │   │   ├── main.py
    │   │   ├── plot.py
    │   │   ├── prepare_yahoo_data.py
    │   │   └── style.mplstyle
    │   ├── Seznec_EFF
    │   │   ├── .gitignore
    │   │   ├── main.py
    │   │   ├── plot.py
    │   │   ├── plot_delay.py
    │   │   └── style.mplstyle
    │   └── Seznec_asymptotic
    │   │   ├── .gitignore
    │   │   ├── main.py
    │   │   ├── plot.py
    │   │   └── style.mplstyle
    ├── LICENSE
    ├── Policies
    │   ├── .gitignore
    │   ├── AdBandits.py
    │   ├── AdSwitch.py
    │   ├── AdSwitchNew.py
    │   ├── Aggregator.py
    │   ├── ApproximatedFHGittins.py
    │   ├── BESA.py
    │   ├── BasePolicy.py
    │   ├── BaseWrapperPolicy.py
    │   ├── BayesUCB.py
    │   ├── BayesianIndexPolicy.py
    │   ├── BoltzmannGumbel.py
    │   ├── C
    │   │   ├── .gitignore
    │   │   ├── Makefile
    │   │   ├── README.md
    │   │   ├── kullback_py3.c
    │   │   ├── setup.py
    │   │   └── setup.py3
    │   ├── CD_UCB.py
    │   ├── CORRAL.py
    │   ├── CPUCB.py
    │   ├── CUSUM_UCB.py
    │   ├── DMED.py
    │   ├── DiscountedBayesianIndexPolicy.py
    │   ├── DiscountedThompson.py
    │   ├── DiscountedUCB.py
    │   ├── DoublingTrickWrapper.py
    │   ├── EmpiricalMeans.py
    │   ├── EpsilonGreedy.py
    │   ├── Exp3.py
    │   ├── Exp3PlusPlus.py
    │   ├── Exp3R.py
    │   ├── Exp3S.py
    │   ├── Experimentals
    │   │   ├── .gitignore
    │   │   ├── BlackBoxOpt.py
    │   │   ├── KLempUCB.py
    │   │   ├── Makefile
    │   │   ├── README.md
    │   │   ├── ThompsonRobust.py
    │   │   ├── UCBcython.pyx
    │   │   ├── UCBjulia.jl
    │   │   ├── UCBjulia.py
    │   │   ├── UCBlog10.py
    │   │   ├── UCBlog10alpha.py
    │   │   ├── UCBoost_cython.pyx
    │   │   ├── UCBoost_faster.py
    │   │   ├── UCBoost_faster_cython.pyx
    │   │   ├── UCBwrong.py
    │   │   ├── UnsupervisedLearning.py
    │   │   ├── __init__.py
    │   │   ├── klUCBlog10.py
    │   │   ├── klUCBloglog10.py
    │   │   └── setup.py
    │   ├── ExploreThenCommit.py
    │   ├── FEWA.py
    │   ├── GLR_UCB.py
    │   ├── GenericAggregation.py
    │   ├── GreedyOracle.py
    │   ├── H_UCB.py
    │   ├── Hedge.py
    │   ├── IMED.py
    │   ├── IndexPolicy.py
    │   ├── LM_DSEE.py
    │   ├── LearnExp.py
    │   ├── MEGA.py
    │   ├── MOSS.py
    │   ├── MOSSAnytime.py
    │   ├── MOSSExperimental.py
    │   ├── MOSSH.py
    │   ├── Makefile
    │   ├── Monitored_UCB.py
    │   ├── MusicalChair.py
    │   ├── MusicalChairNoSensing.py
    │   ├── OCUCB.py
    │   ├── OCUCBH.py
    │   ├── OSSB.py
    │   ├── OracleSequentiallyRestartPolicy.py
    │   ├── PHE.py
    │   ├── PRH_UCB.py
    │   ├── Posterior
    │   │   ├── Beta.py
    │   │   ├── DiscountedBeta.py
    │   │   ├── Gamma.py
    │   │   ├── Gauss.py
    │   │   ├── Posterior.py
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   └── with_proba.py
    │   ├── ProbabilityPursuit.py
    │   ├── RAWUCB.py
    │   ├── RCB.py
    │   ├── README.md
    │   ├── RH_UCB.py
    │   ├── RH_UCB_Temp.py
    │   ├── RandomizedIndexPolicy.py
    │   ├── SIC_MMAB.py
    │   ├── SWA.py
    │   ├── SWHash_UCB.py
    │   ├── Sampled_R_UCB.py
    │   ├── SlidingWindowRestart.py
    │   ├── SlidingWindowUCB.py
    │   ├── Softmax.py
    │   ├── SparseUCB.py
    │   ├── SparseWrapper.py
    │   ├── SparseklUCB.py
    │   ├── StrategicBasePolicy.py
    │   ├── StrategicIndexPolicy.py
    │   ├── StrategicUCB2PhaseRobustDeprecated.py
    │   ├── SuccessiveElimination.py
    │   ├── TakeFixedArm.py
    │   ├── TakeRandomFixedArm.py
    │   ├── Thompson.py
    │   ├── TrekkingTSN.py
    │   ├── TsallisInf.py
    │   ├── UCB.py
    │   ├── UCBH.py
    │   ├── UCBV.py
    │   ├── UCBVtuned.py
    │   ├── UCBalpha.py
    │   ├── UCBdagger.py
    │   ├── UCBimproved.py
    │   ├── UCBmin.py
    │   ├── UCBoost.py
    │   ├── UCBplus.py
    │   ├── UCBrandomInit.py
    │   ├── Uniform.py
    │   ├── UniformOnSome.py
    │   ├── WrapRange.py
    │   ├── __init__.py
    │   ├── _test_for_BESA_core_function.py
    │   ├── klUCB.py
    │   ├── klUCBH.py
    │   ├── klUCBHPlus.py
    │   ├── klUCBPlus.py
    │   ├── klUCBPlusPlus.py
    │   ├── klUCB_forGLR.py
    │   ├── klUCBloglog.py
    │   ├── klUCBloglog_forGLR.py
    │   ├── klUCBswitch.py
    │   ├── kullback.py
    │   ├── kullback.pydoctest.txt
    │   ├── kullback_cython.pyx
    │   ├── setup.py
    │   ├── usenumba.py
    │   └── with_proba.py
    ├── PoliciesMultiPlayers
    │   ├── ALOHA.py
    │   ├── BaseCentralizedPolicy.py
    │   ├── BaseMPPolicy.py
    │   ├── CentralizedCycling.py
    │   ├── CentralizedFixed.py
    │   ├── CentralizedIMP.py
    │   ├── CentralizedMultiplePlay.py
    │   ├── ChildPointer.py
    │   ├── DepRound.py
    │   ├── EstimateM.py
    │   ├── OracleFair.py
    │   ├── OracleNotFair.py
    │   ├── README.md
    │   ├── RandTopM.py
    │   ├── RandTopMEst.py
    │   ├── Scenario1.py
    │   ├── Selfish.py
    │   ├── __init__.py
    │   ├── rhoCentralized.py
    │   ├── rhoEst.py
    │   ├── rhoLearn.py
    │   ├── rhoLearnEst.py
    │   ├── rhoLearnExp3.py
    │   ├── rhoRand.py
    │   ├── rhoRandALOHA.py
    │   ├── rhoRandRand.py
    │   ├── rhoRandRotating.py
    │   ├── rhoRandSticky.py
    │   └── with_proba.py
    ├── README.rst
    ├── __init__.py
    ├── complete_tree_exploration_for_MP_bandits.py
    ├── configuration.py
    ├── configuration_all_singleplayer.py
    ├── configuration_comparing_aggregation_algorithms.py
    ├── configuration_comparing_doubling_algorithms.py
    ├── configuration_markovian.py
    ├── configuration_multiplayers.py
    ├── configuration_multiplayers_nonstationary.py
    ├── configuration_multiplayers_with_aggregation.py
    ├── configuration_nonstationary.py
    ├── configuration_sparse.py
    ├── configuration_sparse_multiplayers.py
    ├── env_client.cpp
    ├── env_client.py
    ├── example_of_configuration_multiplayers.py
    ├── example_of_configuration_singleplayer.py
    ├── example_of_main_multiplayers_more.py
    ├── example_of_main_singleplayer.py
    ├── include
    │   ├── README.md
    │   ├── docopt.cpp
    │   ├── docopt.h
    │   ├── docopt_private.h
    │   ├── docopt_util.h
    │   ├── docopt_value.h
    │   └── subprocess.hpp
    ├── main.py
    ├── main_multiplayers.py
    ├── main_multiplayers_more.py
    ├── main_sparse_multiplayers.py
    ├── policy_server.py
    ├── save_configuration_for_reproducibility.py
    └── very_simple_configuration.py
├── docker
    └── Dockerfile
└── strategic_scripts
    ├── draw_fig.py
    ├── help_experiment.py
    ├── main.py
    ├── run_experiment.py
    ├── run_h_ucb.sh
    ├── run_rh_ucb.sh
    ├── run_sampled_r_ucb.sh
    ├── run_ucb.sh
    └── setups
        ├── N100_05X100.json
        ├── N100_05X200.json
        ├── N100_05X300.json
        ├── N100_05X400.json
        ├── N100_05X500.json
        ├── N100_09X100.json
        ├── N100_09X200.json
        ├── N100_09X300.json
        ├── N100_09X400.json
        ├── N100_09X500.json
        ├── N100_default.json
        ├── N100_rh_ucb_best_10_100_replicate1000X3.json
        ├── N100_single_origin_arm1000X1.json
        └── N100_single_origin_arm1000X4.json


/.gitignore:
--------------------------------------------------------------------------------
1 | # Python cache
2 | __pycache__/
3 | 
4 | # Visual Studio Code configuration
5 | .vscode/
6 | 


--------------------------------------------------------------------------------
/SMPyBandits/Arms/Arm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ Base class for an arm class."""
 3 | from __future__ import division, print_function  # Python 2 compatibility
 4 | 
 5 | __author__ = "Lilian Besson"
 6 | __version__ = "0.6"
 7 | 
 8 | 
 9 | class Arm(object):
10 |     """ Base class for an arm class."""
11 | 
12 |     def __init__(self, lower=0., amplitude=1.):
13 |         """ Base class for an arm class."""
14 |         self.lower = lower  #: Lower value of rewards
15 |         self.amplitude = amplitude  #: Amplitude of value of rewards
16 |         self.min = lower  #: Lower value of rewards
17 |         self.max = lower + amplitude  #: Higher value of rewards
18 | 
19 |     # --- Printing
20 | 
21 |     # This decorator @property makes this method an attribute, cf. https://docs.python.org/3/library/functions.html#property
22 |     @property
23 |     def lower_amplitude(self):
24 |         """(lower, amplitude)"""
25 |         if hasattr(self, 'lower') and hasattr(self, 'amplitude'):
26 |             return self.lower, self.amplitude
27 |         elif hasattr(self, 'min') and hasattr(self, 'max'):
28 |             return self.min, self.max - self.min
29 |         else:
30 |             raise NotImplementedError("This method lower_amplitude() has to be implemented in the class inheriting from Arm.")
31 | 
32 |     # --- Printing
33 | 
34 |     def __str__(self):
35 |         return self.__class__.__name__
36 | 
37 |     def __repr__(self):
38 |         return "{}({})".format(self.__class__.__name__, self.__dir__)
39 | 
40 |     # --- Random samples
41 | 
42 |     def draw(self, t=None):
43 |         """ Draw one random sample."""
44 |         raise NotImplementedError("This method draw(t) has to be implemented in the class inheriting from Arm.")
45 | 
46 |     def oracle_draw(self, t = None):
47 |         # draw the arm as usual but return the mean
48 |         assert hasattr(self , "mean"), "oracle_draw can be used on Arm with self.mean"
49 |         mean = self.mean
50 |         self.draw(t)
51 |         return mean
52 | 
53 |     def set_mean_param(self,mean):
54 |         raise NotImplementedError("This method draw(t) has to be implemented in the class inheriting from Arm.")
55 | 
56 | 
57 | 
58 |     def draw_nparray(self, shape=(1,)):
59 |         """ Draw a numpy array of random samples, of a certain shape."""
60 |         raise NotImplementedError("This method draw_nparray(t) has to be implemented in the class inheriting from Arm.")
61 | 
62 |     # --- Lower bound
63 | 
64 |     @staticmethod
65 |     def kl(x, y):
66 |         """ The kl(x, y) to use for this arm."""
67 |         raise NotImplementedError("This method kl(x, y) has to be implemented in the class inheriting from Arm.")
68 | 
69 |     @staticmethod
70 |     def oneLR(mumax, mu):
71 |         """ One term of the Lai & Robbins lower bound for Gaussian arms: (mumax - mu) / KL(mu, mumax). """
72 |         raise NotImplementedError("This method oneLR(mumax, mu) has to be implemented in the class inheriting from Arm.")
73 | 
74 |     @staticmethod
75 |     def oneHOI(mumax, mu):
76 |         """ One term for the HOI factor for this arm."""
77 |         return 1 - (mumax - mu)
78 | 


--------------------------------------------------------------------------------
/SMPyBandits/Arms/Bernoulli.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """ Bernoulli distributed arm.
  3 | 
  4 | Example of creating an arm:
  5 | 
  6 | >>> import random; import numpy as np
  7 | >>> random.seed(0); np.random.seed(0)
  8 | >>> B03 = Bernoulli(0.3)
  9 | >>> B03
 10 | B(0.3)
 11 | >>> B03.mean
 12 | 0.3
 13 | 
 14 | Examples of sampling from an arm:
 15 | 
 16 | >>> B03.draw()
 17 | 0
 18 | >>> B03.draw_nparray(20)
 19 | array([1., 0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 1., 0., 0., 0., 1.,
 20 |        1., 1., 1.])
 21 | """
 22 | from __future__ import division, print_function  # Python 2 compatibility
 23 | 
 24 | __author__ = "Lilian Besson"
 25 | __version__ = "0.6"
 26 | 
 27 | import numpy as np
 28 | from numpy.random import binomial
 29 | 
 30 | # Local imports
 31 | try:
 32 |     from .Arm import Arm
 33 |     from .kullback import klBern
 34 | except ImportError:
 35 |     from Arm import Arm
 36 |     from kullback import klBern
 37 | 
 38 | 
 39 | class Bernoulli(Arm):
 40 |     """ Bernoulli distributed arm."""
 41 | 
 42 |     def __init__(self, probability):
 43 |         """New arm."""
 44 |         assert 0 <= probability <= 1, "Error, the parameter probability for Bernoulli class has to be in [0, 1]."  # DEBUG
 45 |         self.probability = probability  #: Parameter p for this Bernoulli arm
 46 |         self.mean = probability  #: Mean for this Bernoulli arm
 47 | 
 48 |     # --- Random samples
 49 | 
 50 |     def draw(self, t=None):
 51 |         """ Draw one random sample."""
 52 |         return binomial(1, self.probability)
 53 |         # return np.asarray(binomial(1, self.probability), dtype=float)
 54 | 
 55 |     def draw_nparray(self, shape=(1,)):
 56 |         """ Draw a numpy array of random samples, of a certain shape."""
 57 |         return np.asarray(binomial(1, self.probability, shape), dtype=float)
 58 | 
 59 |     def set_mean_param(self, probability):
 60 |         self.probability = self.mean = probability
 61 | 
 62 |     # --- Printing
 63 | 
 64 |     # This decorator @property makes this method an attribute, cf. https://docs.python.org/3/library/functions.html#property
 65 |     @property
 66 |     def lower_amplitude(self):
 67 |         """(lower, amplitude)"""
 68 |         return 0., 1.
 69 | 
 70 |     def __str__(self):
 71 |         return "Bernoulli"
 72 | 
 73 |     def __repr__(self):
 74 |         return "B({:.3g})".format(self.probability)
 75 | 
 76 |     # --- Lower bound
 77 | 
 78 |     @staticmethod
 79 |     def kl(x, y):
 80 |         """ The kl(x, y) to use for this arm."""
 81 |         return klBern(x, y)
 82 | 
 83 |     @staticmethod
 84 |     def oneLR(mumax, mu):
 85 |         """ One term of the Lai & Robbins lower bound for Bernoulli arms: (mumax - mu) / KL(mu, mumax). """
 86 |         return (mumax - mu) / klBern(mu, mumax)
 87 | 
 88 | 
 89 | # Only export and expose the class defined here
 90 | __all__ = ["Bernoulli"]
 91 | 
 92 | 
 93 | # --- Debugging
 94 | 
 95 | if __name__ == "__main__":
 96 |     # Code for debugging purposes.
 97 |     from doctest import testmod
 98 |     print("\nTesting automatically all the docstring written in each functions of this module :")
 99 |     testmod(verbose=True)
100 | 


--------------------------------------------------------------------------------
/SMPyBandits/Arms/Constant.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """ Arm with a constant reward. Useful for debugging.
  3 | 
  4 | Example of creating an arm:
  5 | 
  6 | >>> C013 = Constant(0.13)
  7 | >>> C013
  8 | Constant(0.13)
  9 | >>> C013.mean
 10 | 0.13
 11 | 
 12 | Examples of sampling from an arm:
 13 | 
 14 | >>> C013.draw()
 15 | 0.13
 16 | >>> C013.draw_nparray(3)
 17 | array([0.13, 0.13, 0.13])
 18 | """
 19 | from __future__ import division, print_function  # Python 2 compatibility
 20 | 
 21 | __author__ = "Lilian Besson"
 22 | __version__ = "0.6"
 23 | 
 24 | import numpy as np
 25 | 
 26 | # Local imports
 27 | try:
 28 |     from .Arm import Arm
 29 | except ImportError:
 30 |     from Arm import Arm
 31 | 
 32 | 
 33 | class Constant(Arm):
 34 |     """ Arm with a constant reward. Useful for debugging.
 35 | 
 36 |     - `constant_reward` is the constant reward,
 37 |     - `lower`, `amplitude` default to `floor(constant_reward)`, `1` (so the )
 38 | 
 39 |     >>> arm_0_5 = Constant(0.5)
 40 |     >>> arm_0_5.draw()
 41 |     0.5
 42 |     >>> arm_0_5.draw_nparray((3, 2))
 43 |     array([[0.5, 0.5],
 44 |            [0.5, 0.5],
 45 |            [0.5, 0.5]])
 46 |     """
 47 | 
 48 |     def __init__(self, constant_reward=0.5, lower=0., amplitude=1.):
 49 |         """ New arm."""
 50 |         constant_reward = float(constant_reward)
 51 |         self.constant_reward = constant_reward  #: Constant value of rewards
 52 |         lower = min(lower, np.floor(constant_reward))
 53 |         self.lower = lower  #: Known lower value of rewards
 54 |         self.amplitude = amplitude  #: Known amplitude of rewards
 55 |         self.mean = constant_reward  #: Mean for this Constant arm
 56 | 
 57 |     # --- Random samples
 58 | 
 59 |     def draw(self, t=None):
 60 |         """ Draw one constant sample. The parameter t is ignored in this Arm."""
 61 |         return self.constant_reward
 62 | 
 63 |     def draw_nparray(self, shape=(1,)):
 64 |         """ Draw a numpy array of constant samples, of a certain shape."""
 65 |         return np.full(shape, self.constant_reward)
 66 | 
 67 |     def set_mean_param(self, mean):
 68 |         self.mean = mean
 69 | 
 70 |     # --- Printing
 71 | 
 72 |     def __str__(self):
 73 |         return "Constant"
 74 | 
 75 |     def __repr__(self):
 76 |         return "Constant({:.3g})".format(self.constant_reward)
 77 | 
 78 |     # --- Lower bound
 79 | 
 80 |     @staticmethod
 81 |     def kl(x, y):
 82 |         """ The `kl(x, y) = abs(x - y)` to use for this arm."""
 83 |         return abs(x - y)
 84 | 
 85 |     @staticmethod
 86 |     def oneLR(mumax, mu):
 87 |         """ One term of the Lai & Robbins lower bound for Constant arms: (mumax - mu) / KL(mu, mumax). """
 88 |         return (mumax - mu) / abs(mumax - mu)
 89 | 
 90 | 
 91 | __all__ = ["Constant"]
 92 | 
 93 | 
 94 | # --- Debugging
 95 | 
 96 | if __name__ == "__main__":
 97 |     # Code for debugging purposes.
 98 |     from doctest import testmod
 99 |     print("\nTesting automatically all the docstring written in each functions of this module :")
100 |     testmod(verbose=True)
101 | 


--------------------------------------------------------------------------------
/SMPyBandits/Arms/README.md:
--------------------------------------------------------------------------------
 1 | # [Arms](https://smpybandits.github.io/docs/Arms.html)
 2 | > See here the documentation: [docs/Arms](https://smpybandits.github.io/docs/Arms.html)
 3 | 
 4 | Arms : contains different types of bandit arms:
 5 | [`Constant`](Constant.py), [`UniformArm`](UniformArm.py), [`Bernoulli`](Bernoulli.py), [`Binomial`](Binomial.py), [`Poisson`](Poisson.py), [`Gaussian`](Gaussian.py), [`Exponential`](Exponential.py), [`Gamma`](Gamma.py).
 6 | 
 7 | Each arm class follows the same interface:
 8 | 
 9 | ```python
10 | >>> my_arm = Arm(params)
11 | >>> my_arm.mean
12 | 0.5
13 | >>> my_arm.draw()  # one random draw
14 | 0.0
15 | >>> my_arm.draw_nparray(20)  # or ((3, 10)), many draw
16 | array([ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  1.,  0.,  0.,
17 |         1.,  0.,  0.,  0.,  1.,  1.,  1.])
18 | ```
19 | 
20 | 
21 | Also the [`__init__.py`](__init__.py) file contains:
22 | 
23 | - `uniformMeans`, to generate uniformly spaced means of arms.
24 | - `uniformMeansWithSparsity`, to generate uniformly spaced means of arms, with sparsity constraints.
25 | - `randomMeans`, to generate randomly spaced means of arms.
26 | - `randomMeansWithGapBetweenMbestMworst`, to generate randomly spaced means of arms, with a constraint on the gap between the M-best arms and the (K-M)-worst arms.
27 | - `randomMeans`, to generate randomly spaced means of arms.
28 | - `shuffled`, to return a shuffled version of a list.
29 | - Utility functions `array_from_str` `list_from_str` and `tuple_from_str` to obtain a `numpy.ndarray`, a `list` or a `tuple` from a string (used for the CLI env variables interface).
30 | - `optimal_selection_probabilities`.


--------------------------------------------------------------------------------
/SMPyBandits/Arms/RestedRottingArm.py:
--------------------------------------------------------------------------------
 1 | """
 2 | author: Julien Seznec
 3 | Rested rotting arm, i.e. arms with mean value which decay at each pull
 4 | """
 5 | 
 6 | try:
 7 |     from . import Arm, Bernoulli, Binomial, UnboundedExponential, UnboundedGaussian, Constant, UnboundedPoisson
 8 | except ImportError:
 9 |     from Arm import Arm
10 |     from Bernoulli import Bernoulli
11 |     from Binomial import Binomial
12 |     from Exponential import UnboundedExponential
13 |     from Gaussian import UnboundedGaussian
14 |     from Constant import Constant
15 |     from Poisson import UnboundedPoisson
16 | 
17 | class RestedRottingArm(Arm):
18 |     def __init__(self, decayingFunction, staticArm):
19 |         self.decayingFunction = decayingFunction
20 |         # It provides the mean of the arm after n pulls. EXCEPT for truncated distributions where it is the mean of the untrucated distributions
21 |         self.arm = staticArm
22 |         self.pull_count = 0
23 |         self.arm.set_mean_param(self.decayingFunction(self.pull_count))
24 |         self.mean = self.arm.mean
25 | 
26 |     def draw(self, t=None):
27 |         self.arm.set_mean_param(self.decayingFunction(self.pull_count))
28 |         current_mean = self.mean
29 |         self.mean = self.arm.mean
30 |         draw = self.arm.draw(t)
31 |         self.pull_count += 1
32 |         self.arm.set_mean_param(self.decayingFunction(self.pull_count))
33 |         self.mean = self.arm.mean
34 |         assert current_mean >= self.mean, "Arm has increased."
35 |         return draw
36 | 
37 | 
38 | class RestedRottingBernoulli(RestedRottingArm):
39 |     def __init__(self, decayingFunction):
40 |         arm = Bernoulli(0)
41 |         super(RestedRottingBernoulli, self).__init__(decayingFunction, arm)
42 | 
43 | 
44 | class RestedRottingBinomial(RestedRottingArm):
45 |     def __init__(self, decayingFunction, draws=1):
46 |         arm = Binomial(0, draws)
47 |         super(RestedRottingBinomial, self).__init__(decayingFunction, arm)
48 | 
49 | 
50 | class RestedRottingConstant(RestedRottingArm):
51 |     def __init__(self, decayingFunction):
52 |         arm = Constant(0)
53 |         super(RestedRottingConstant, self).__init__(decayingFunction, arm)
54 | 
55 | 
56 | class RestedRottingExponential(RestedRottingArm):
57 |     def __init__(self, decayingFunction):
58 |         arm = UnboundedExponential(1)
59 |         super(RestedRottingExponential, self).__init__(decayingFunction, arm)
60 | 
61 | 
62 | class RestedRottingGaussian(RestedRottingArm):
63 |     def __init__(self, decayingFunction, sigma=1):
64 |         arm = UnboundedGaussian(0, sigma)
65 |         super(RestedRottingGaussian, self).__init__(decayingFunction, arm)
66 | 
67 | 
68 | class RestedRottingPoisson(RestedRottingArm):
69 |     def __init__(self, decayingFunction, sigma=1):
70 |         arm = UnboundedPoisson(0)
71 |         super(RestedRottingPoisson, self).__init__(decayingFunction, arm)
72 | 
73 | 
74 | 
75 | if __name__ == '__main__':
76 |     rotting_bernoulli = RestedRottingBernoulli(lambda n: 0 if n > 10 else 1)
77 |     rotting_gaussian = RestedRottingGaussian(lambda n: 0 if n > 10 else 1)
78 |     print([rotting_gaussian.draw() for _ in range(50)])
79 |     print([rotting_bernoulli.draw() for _ in range(50)])


--------------------------------------------------------------------------------
/SMPyBandits/Arms/RestlessArm.py:
--------------------------------------------------------------------------------
 1 | """
 2 | author: Julien Seznec
 3 | Restless arm, i.e. arms with mean value which change at each round
 4 | """
 5 | try:
 6 |     from . import Arm, Bernoulli, Binomial, UnboundedExponential, UnboundedGaussian, Constant, UnboundedPoisson
 7 | except ImportError:
 8 |     from Arm import Arm
 9 |     from Bernoulli import Bernoulli
10 |     from Binomial import Binomial
11 |     from Exponential import UnboundedExponential
12 |     from Gaussian import UnboundedGaussian
13 |     from Constant import Constant
14 |     from Poisson import UnboundedPoisson
15 | 
16 | from math import sin
17 | 
18 | 
19 | class RestlessArm(Arm):
20 |     def __init__(self, rewardFunction, staticArm):
21 |         self.reward = rewardFunction
22 |         # It provides the mean of the arm after n pulls. EXCEPT for truncated distributions where it is the mean of the untrucated distributions
23 |         self.arm = staticArm
24 |         self.mean = self.arm.mean
25 | 
26 |     def draw(self, t):
27 |         self.arm.set_mean_param(self.reward(t))
28 |         self.mean = self.arm.mean
29 |         draw = self.arm.draw(t)
30 |         return draw
31 | 
32 | 
33 | class RestlessBernoulli(RestlessArm):
34 |     def __init__(self, rewardFunction):
35 |         arm = Bernoulli(0)
36 |         super(RestlessBernoulli, self).__init__(rewardFunction, arm)
37 | 
38 | 
39 | class RestlessBinomial(RestlessArm):
40 |     def __init__(self, rewardFunction, draws=1):
41 |         arm = Binomial(0, draws)
42 |         super(RestlessBinomial, self).__init__(rewardFunction, arm)
43 | 
44 | 
45 | class RestlessConstant(RestlessArm):
46 |     def __init__(self, rewardFunction):
47 |         arm = Constant(0)
48 |         super(RestlessConstant, self).__init__(rewardFunction, arm)
49 | 
50 | 
51 | class RestlessExponential(RestlessArm):
52 |     def __init__(self, rewardFunction):
53 |         arm = UnboundedExponential(1)
54 |         super(RestlessExponential, self).__init__(rewardFunction, arm)
55 | 
56 | 
57 | class RestlessGaussian(RestlessArm):
58 |     def __init__(self, rewardFunction, sigma=1):
59 |         arm = UnboundedGaussian(0, sigma)
60 |         super(RestlessGaussian, self).__init__(rewardFunction, arm)
61 | 
62 | 
63 | class RestlessPoisson(RestlessArm):
64 |     def __init__(self, rewardFunction, sigma=1):
65 |         arm = UnboundedPoisson(0)
66 |         super(RestlessPoisson, self).__init__(rewardFunction, arm)
67 | 
68 | 
69 | 
70 | if __name__ == '__main__':
71 |     restless_bernoulli = RestlessBernoulli(lambda x :sin(x)**2)
72 |     restless_gaussian = RestlessGaussian(lambda x :sin(x)**2)
73 |     restless_binomial = RestlessBinomial(lambda x :sin(x)**2, draws=10)
74 |     print([sin(t)**2 for t in range(50)])
75 |     print([restless_gaussian.draw(t) for t in range(50)])
76 |     print([restless_bernoulli.draw(t) for t in range(50)])
77 |     print([restless_binomial.draw(t) for t in range(50)])
78 | 


--------------------------------------------------------------------------------
/SMPyBandits/Arms/kullback.py:
--------------------------------------------------------------------------------
1 | ../Policies/kullback.py


--------------------------------------------------------------------------------
/SMPyBandits/Arms/usenumba.py:
--------------------------------------------------------------------------------
1 | ../Policies/usenumba.py


--------------------------------------------------------------------------------
/SMPyBandits/C_Interface/.gitignore:
--------------------------------------------------------------------------------
1 | test
2 | 


--------------------------------------------------------------------------------
/SMPyBandits/C_Interface/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile
2 | SHELL=/usr/bin/env /bin/bash
3 | 
4 | test2:
5 | 	gcc -o test `python2.7-config --cflags` test.c `python2.7-config --ldflags`
6 | 
7 | test3:
8 | 	gcc -o test `python3.5-config --cflags` test.c `python3.5-config --ldflags`
9 | 


--------------------------------------------------------------------------------
/SMPyBandits/C_Interface/README.md:
--------------------------------------------------------------------------------
1 | # C_Interface
2 | 
3 | This folder contains some experiments to create a C++ binding from my Python framework.
4 | 
5 | TL;DR: so far, it failed. I stopped trying.
6 | 


--------------------------------------------------------------------------------
/SMPyBandits/C_Interface/Sample.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf8 -*-
 2 | """ Test module to be called from C++"""
 3 | 
 4 | # from __future__ import print_function
 5 | 
 6 | def add(a, b):
 7 |     """ Returns the sum of two numbers."""
 8 |     a, b = int(a), int(b)
 9 |     c = str(a + b)
10 |     print("a = {} and b = {} and a + b = {}".format(a, b, c))
11 |     return c
12 | 


--------------------------------------------------------------------------------
/SMPyBandits/C_Interface/test.cpp:
--------------------------------------------------------------------------------
 1 | #include "iostream"
 2 | #include "Python.h"
 3 | 
 4 | 
 5 | int main(int argc, char* argv[]) {
 6 |     printf("Calling Python to find the sum of 2 and 2.\n");
 7 | 
 8 |     // Initialize the Python interpreter.
 9 |     Py_Initialize();
10 | 
11 |     // Create some Python objects that will later be assigned values.
12 |     PyObject *pName, *pModule, *pDict, *pFunc, *pArgs, *pValue;
13 | 
14 |     // Convert the file name to a Python string.
15 |     pName = PyString_FromString("Sample");
16 | 
17 |     // Import the file as a Python module.
18 |     pModule = PyImport_Import(pName);
19 | 
20 |     // Create a dictionary for the contents of the module.
21 |     pDict = PyModule_GetDict(pModule);
22 | 
23 |     // Get the add method from the dictionary.
24 |     pFunc = PyDict_GetItemString(pDict, "add");
25 | 
26 |     // Create a Python tuple to hold the arguments to the method.
27 |     pArgs = PyTuple_New(2);
28 | 
29 |     // Convert 2 to a Python integer.
30 |     pValue = PyInt_FromLong(2);
31 | 
32 |     // Set the Python int as the first and second arguments to the method.
33 |     PyTuple_SetItem(pArgs, 0, pValue);
34 |     PyTuple_SetItem(pArgs, 1, pValue);
35 | 
36 |     // Call the function with the arguments.
37 |     PyObject* pResult = PyObject_CallObject(pFunc, pArgs);
38 | 
39 |     // Print a message if calling the method failed.
40 |     if (pResult == NULL) {
41 |         printf("Calling the add method failed.\n");
42 |     }
43 | 
44 |     // Convert the result to a long from a Python object.
45 |     long result = PyInt_AsLong(pResult);
46 | 
47 |     // Destroy the Python interpreter.
48 |     Py_Finalize();
49 | 
50 |     // Print the result.
51 |     printf("The result is %d.\n", result);
52 |     std::cin.ignore();
53 | 
54 |     return 0;
55 | }
56 | 


--------------------------------------------------------------------------------
/SMPyBandits/C_Interface/test.py:
--------------------------------------------------------------------------------
 1 | from Policies import *
 2 | 
 3 | policy = UCB(10)
 4 | print(policy)
 5 | 
 6 | def choice():
 7 |     result = policy.choice()
 8 |     return result
 9 | 
10 | def getReward(arm, reward):
11 |     result = policy.getReward(arm, reward)
12 |     return result
13 | 


--------------------------------------------------------------------------------
/SMPyBandits/C_Interface/test2.cpp:
--------------------------------------------------------------------------------
 1 | #include <string>
 2 | #include <iostream>
 3 | #include <boost/python.hpp>
 4 | 
 5 | using namespace boost::python;
 6 | 
 7 | int main(int, char **) {
 8 |     Py_Initialize();
 9 | 
10 |     try {
11 |         object module = import("__main__");
12 |         object name_space = module.attr("__dict__");
13 |         exec_file("test.py", name_space, name_space);
14 | 
15 |         object choice = name_space["choice"];
16 |         object result = choice();
17 |         // result is a dictionary
18 |         std::string val = extract<std::string>(result["val"]);
19 |         std::cout << val << std::endl;
20 |     }
21 |     catch (error_already_set) {
22 |         PyErr_Print();
23 |     }
24 | 
25 |     Py_Finalize();
26 |     return 0;
27 | }


--------------------------------------------------------------------------------
/SMPyBandits/C_Interface/test_sub.cpp:
--------------------------------------------------------------------------------
 1 | // #! g++ -std=c++11 -Iinclude -o test_sub.exe test_sub.cpp -pthread
 2 | /**
 3 |     Test of https://github.com/arun11299/cpp-subprocess
 4 | 
 5 |     - Author: Lilian Besson
 6 |     - License: MIT License (https://lbesson.mit-license.org/)
 7 |     - Date: 09-08-2017
 8 |     - Online: https://smpybandits.github.io/
 9 |     - Reference: https://github.com/arun11299/cpp-subprocess
10 | */
11 | 
12 | // Include libraries
13 | #include <iostream>        // streams, <<, >>
14 | #include <string.h>        // strlen
15 | #include <string>
16 | #include "subprocess.hpp"  // From https://github.com/arun11299/cpp-subprocess
17 | 
18 | // Macros to send a message
19 | #define send(msg)          p.send(msg, strlen(msg))
20 | #define communicate(msg)   p.communicate(msg, strlen(msg))
21 | 
22 | 
23 | int main() {
24 |     namespace sp = subprocess;
25 | 
26 |     // auto p = sp::Popen({"python3"}, sp::input{sp::PIPE});
27 |     auto p = sp::Popen({"python3"}, sp::input{sp::PIPE}, sp::output{sp::PIPE});
28 |     auto input  = p.input();
29 |     auto output = p.output();
30 | 
31 |     // Import all the policies
32 |     send("from Policies import *\n");
33 |     // std::cout << output.buf.data() << std::endl;
34 | 
35 |     // Create the policy
36 |     send("policy = UCBalpha(10, alpha=0.5)\n");
37 |     // std::cout << output.buf.data() << std::endl;
38 | 
39 |     // Print it
40 |     send("print(policy)\n");
41 |     // std::cout << output.buf.data() << std::endl;
42 | 
43 |     // Print it
44 |     send("print(policy)\n");
45 |     // std::cout << output.buf.data() << std::endl;
46 | 
47 |     return 0;
48 | }
49 | 


--------------------------------------------------------------------------------
/SMPyBandits/Environment/MAB_rotting.py:
--------------------------------------------------------------------------------
 1 | """
 2 | author : Julien SEZNEC
 3 | Code to launch (rotting) bandit games.
 4 | It is code in a functional programming way : each execution return arrays related to each run.
 5 | """
 6 | 
 7 | import time
 8 | import numpy as np
 9 | import logging
10 | from joblib import Parallel, delayed
11 | 
12 | REPETITIONS = 1000
13 | HORIZON = 10000
14 | 
15 | def repetedRuns(policy, arms, rep = REPETITIONS, T = HORIZON, parallel = True, oracle = False):
16 |     rew = np.empty(shape = (rep, T))
17 |     noisy_rew = np.empty(shape = (rep, T))
18 |     time = np.empty(shape = (rep, T))
19 |     pulls = np.empty(shape=(rep, T))
20 |     cumul_pulls = np.empty(shape=(rep, len(arms)))
21 |     if parallel:
22 |         res = Parallel(n_jobs=parallel)(delayed(singleRun)(policy,arms, T, r, oracle) for r in range(rep))
23 |     else:
24 |         res = [singleRun(policy,arms, T=T) for _ in range(rep)]
25 |     rew[:, :] =  np.array([r['cumul'] for r in res ])
26 |     noisy_rew[:, :] = np.array([r['noisy_cumul'] for r in res])
27 |     time[:, :] = np.array([r['time'] for r in res ])
28 |     pulls[:,:] = np.array([r['pulls'] for r in res ])
29 |     cumul_pulls[:,:] = np.array([r['cumul_pulls'] for r in res ])
30 |     return rew, noisy_rew, time, pulls, cumul_pulls
31 | 
32 | def singleRun(policy, arms, T = HORIZON,rep_index = 0, oracle=False):
33 |     myArms = [arm[0](**arm[1]) for arm in arms]
34 |     if oracle:
35 |         policy[1]['arms'] = myArms
36 |     myPolicy = policy[0](len(myArms), **policy[1])
37 |     myPolicy.startGame()
38 |     logging.debug(str(rep_index) + ' ' + myPolicy.__str__())
39 |     res = play(myArms, myPolicy, T, Oracle=oracle)
40 |     return {
41 |       'cumul': np.array(res['rewards']).cumsum(),
42 |       'noisy_cumul': np.array(res['noisy_rewards']),
43 |       'time' : np.array(res['time']),
44 |       'pulls' : np.array(res['pulls']),
45 |       'cumul_pulls' : np.array(res['cumul_pulls'])
46 |     }
47 | 
48 | 
49 | def play(arms, policy, T, Oracle= False):
50 |     noisy_rewards = []
51 |     rewards = []
52 |     times = []
53 |     pulls = []
54 |     cumul_pulls = [0 for _ in range(len(arms))]
55 |     for t in range(T):
56 |         start = time.time()
57 |         choice = policy.choice()
58 |         reward = arms[choice].mean
59 |         noisy_reward = arms[choice].draw(t) if not Oracle else arms[choice].oracle_draw(t)
60 |         policy.getReward(choice, noisy_reward)
61 |         times.append(time.time() - start)
62 |         noisy_rewards.append(noisy_reward)
63 |         rewards.append(reward)
64 |         pulls.append(choice)
65 |         cumul_pulls[choice] += 1
66 |     return {'rewards': rewards, 'noisy_rewards': noisy_rewards, 'time': times, 'pulls': pulls, 'cumul_pulls' : cumul_pulls}


--------------------------------------------------------------------------------
/SMPyBandits/Environment/README.md:
--------------------------------------------------------------------------------
 1 | # [Environments](https://smpybandits.github.io/docs/Environment.html)
 2 | > See here the documentation: [docs/Environment](https://smpybandits.github.io/docs/Environment.html)
 3 | 
 4 | - [`MAB`](MAB.py), [`MarkovianMAB`](MarkovianMAB.py), [`DynamicMAB`](DynamicMAB.py) and [`IncreasingMAB`](IncreasingMAB.py) objects, used to wrap the problems (list of arms).
 5 | - [`Result`](Result.py) and [`ResultMultiPlayers`](ResultMultiPlayers.py) objects, used to wrap simulation results (list of decisions and rewards).
 6 | - [`Evaluator`](Evaluator.py) environment, used to wrap simulation, for the single player case.
 7 | - [`EvaluatorMultiPlayers`](EvaluatorMultiPlayers.py) environment, used to wrap simulation, for the multi-players case.
 8 | - [`EvaluatorSparseMultiPlayers`](EvaluatorSparseMultiPlayers.py) environment, used to wrap simulation, for the multi-players case with sparse activated players.
 9 | - [`CollisionModels`](CollisionModels.py) implements different collision models.
10 | 
11 | And useful constants and functions for the plotting and stuff are in the [`__init__.py`](__init__.py) file:
12 | 
13 | - `DPI`, `signature`, `maximizeWindow`, `palette`, `makemarkers`, `wraptext`: for plotting
14 | - `notify`: send a desktop notification
15 | - `Parallel`, `delayed`: joblib related
16 | - `tqdm`: pretty range() loops
17 | - `sortedDistance`, `fairnessMeasures`: science related


--------------------------------------------------------------------------------
/SMPyBandits/Environment/Result.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ Result.Result class to wrap the simulation results."""
 3 | from __future__ import division, print_function  # Python 2 compatibility
 4 | 
 5 | __author__ = "Lilian Besson"
 6 | __version__ = "0.9"
 7 | 
 8 | import numpy as np
 9 | 
10 | 
11 | class Result(object):
12 |     """ Result accumulators."""
13 | 
14 |     # , delta_t_save=1):
15 |     def __init__(self, nbArms, horizon, indexes_bestarm=-1, means=None):
16 |         """ Create ResultMultiPlayers."""
17 |         # self._means = means  # Keep the means for ChangingAtEachRepMAB cases
18 |         # self.delta_t_save = delta_t_save  #: Sample rate for saving.
19 |         self.choices = np.zeros(horizon, dtype=int)  #: Store all the choices.
20 |         self.rewards = np.zeros(horizon)  #: Store all the rewards, to compute the mean.
21 |         self.pulls = np.zeros(nbArms, dtype=int)  #: Store the pulls.
22 |         if means is not None:
23 |             indexes_bestarm = np.nonzero(np.isclose(means, np.max(means)))[0]
24 |         indexes_bestarm = np.asarray(indexes_bestarm)
25 |         if np.size(indexes_bestarm) == 1:
26 |             indexes_bestarm = np.asarray([indexes_bestarm])
27 |         self.indexes_bestarm = [ indexes_bestarm for _ in range(horizon)]  #: Store also the position of the best arm, XXX in case of dynamically switching environment.
28 |         self.running_time = -1  #: Store the running time of the experiment.
29 |         self.memory_consumption = -1  #: Store the memory consumption of the experiment.
30 |         self.number_of_cp_detections = 0  #: Store the number of change point detected during the experiment.
31 | 
32 |     def store(self, time, choice, reward):
33 |         """ Store results."""
34 |         self.choices[time] = choice
35 |         self.rewards[time] = reward
36 |         self.pulls[choice] += 1
37 | 
38 |     def change_in_arms(self, time, indexes_bestarm):
39 |         """ Store the position of the best arm from this list of arm.
40 | 
41 |         - From that time t **and after**, the index of the best arm is stored as ``indexes_bestarm``.
42 | 
43 |         .. warning:: FIXME This is still experimental!
44 |         """
45 |         for t in range(time, len(self.indexes_bestarm)):
46 |             self.indexes_bestarm[t] = indexes_bestarm
47 | 


--------------------------------------------------------------------------------
/SMPyBandits/Environment/ResultMultiPlayers.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ ResultMultiPlayers.ResultMultiPlayers class to wrap the simulation results, for the multi-players case."""
 3 | from __future__ import division, print_function  # Python 2 compatibility
 4 | 
 5 | __author__ = "Lilian Besson"
 6 | __version__ = "0.9"
 7 | 
 8 | import numpy as np
 9 | 
10 | 
11 | class ResultMultiPlayers(object):
12 |     """ ResultMultiPlayers accumulators, for the multi-players case. """
13 | 
14 |     # , delta_t_save=1
15 |     def __init__(self, nbArms, horizon, nbPlayers, means=None):
16 |         """ Create ResultMultiPlayers."""
17 |         # self._means = means  # Keep the means for ChangingAtEachRepMAB cases
18 |         self.choices = np.zeros((nbPlayers, horizon), dtype=int)  #: Store all the choices of all the players
19 |         self.rewards = np.zeros((nbPlayers, horizon))  #: Store all the rewards of all the players, to compute the mean
20 |         # self.rewardsSquared = np.zeros((nbPlayers, horizon))  #: Store all the rewards**2 of all the players, to compute the variance  # XXX uncomment if needed
21 |         self.pulls = np.zeros((nbPlayers, nbArms), dtype=int)  #: Store the pulls of all the players
22 |         self.allPulls = np.zeros((nbPlayers, nbArms, horizon), dtype=int)  #: Store all the pulls of all the players
23 |         self.collisions = np.zeros((nbArms, horizon), dtype=int)  #: Store the collisions on all the arms
24 |         self.running_time = -1  #: Store the running time of the experiment
25 |         self.memory_consumption = -1  #: Store the memory consumption of the experiment
26 | 
27 |     def store(self, time, choices, rewards, pulls, collisions):
28 |         """ Store results."""
29 |         self.choices[:, time] = choices
30 |         self.rewards[:, time] = rewards
31 |         # self.rewardsSquared[:, time] = rewards ** 2  # XXX uncomment if needed
32 |         self.pulls += pulls
33 |         self.allPulls[:, :, time] = pulls
34 |         self.collisions[:, time] = collisions
35 | 


--------------------------------------------------------------------------------
/SMPyBandits/Environment/StrategicResult.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ Result.Result class to wrap the simulation results."""
 3 | from __future__ import division, print_function  # Python 2 compatibility
 4 | 
 5 | __author__ = "SlyJabiru"
 6 | __version__ = "0.1"
 7 | 
 8 | import numpy as np
 9 | 
10 | 
11 | class StrategicResult(object):
12 |     """ Result accumulators."""
13 | 
14 |     # , delta_t_save=1):
15 |     def __init__(self, nbArms, horizon, nbArmsPerAgents, means, bestArmMean,
16 |                  indexes_bestarm=-1):
17 |         """ Create ResultMultiPlayers."""
18 |         # self._means = means  # Keep the means for ChangingAtEachRepMAB cases
19 |         # self.delta_t_save = delta_t_save  #: Sample rate for saving.
20 |         self.means = means
21 |         self.bestArmMean = bestArmMean
22 | 
23 |         self.choices = np.zeros(horizon, dtype=int)  #: Store all the choices.
24 |         self.rewards = np.zeros(horizon)  #: Store all the rewards, to compute the mean.
25 |         self.pulls = np.zeros(nbArms, dtype=int)  #: Store the pulls.
26 |         self.instantRegrets = np.zeros(horizon)
27 |         
28 |         self.nbArmsPerAgents = nbArmsPerAgents
29 |         self.agentChoices = np.zeros(horizon, dtype=int)
30 |         self.agentChosenNb = np.zeros(len(nbArmsPerAgents), dtype=int)
31 | 
32 |         self.rewardsPerArms = np.zeros(nbArms)
33 |         self.rewardsPerAgents = np.zeros(len(nbArmsPerAgents))
34 |         
35 |         # if means is not None:
36 |         #     indexes_bestarm = np.nonzero(np.isclose(means, np.max(means)))[0]
37 |         # indexes_bestarm = np.asarray(indexes_bestarm)
38 |         # if np.size(indexes_bestarm) == 1:
39 |         #     indexes_bestarm = np.asarray([indexes_bestarm])
40 |         # self.indexes_bestarm = [ indexes_bestarm for _ in range(horizon)]  #: Store also the position of the best arm, XXX in case of dynamically switching environment.
41 |         # self.running_time = -1  #: Store the running time of the experiment.
42 |         # self.memory_consumption = -1  #: Store the memory consumption of the experiment.
43 |         # self.number_of_cp_detections = 0  #: Store the number of change point detected during the experiment.
44 | 
45 |     def store(self, time, choice, reward):
46 |         """ Store results."""
47 |         self.choices[time] = choice  # 몇 번 arm 을 뽑았는가?
48 |         self.rewards[time] = reward
49 |         self.pulls[choice] += 1  # 각 arm 을 몇 번 뽑았는가?
50 |         self.instantRegrets[time] = self.bestArmMean - self.means[choice]
51 |         
52 |         armPossession = np.cumsum(self.nbArmsPerAgents) - 1
53 |         temp = (armPossession >= choice)
54 |         agent = np.where(temp)[0][0]
55 |         
56 |         self.agentChoices[time] = agent
57 |         self.agentChosenNb[agent] += 1
58 | 
59 |         # 여기서, agent 당 reward 를 만들어서 올려주자.
60 |         # 그리고, strategic evaluator 에서 받아주면 됨!
61 |         self.rewardsPerArms[choice] += reward
62 |         self.rewardsPerAgents[agent] += reward
63 | 
64 | 
65 |     # def change_in_arms(self, time, indexes_bestarm):
66 |     #     """ Store the position of the best arm from this list of arm.
67 | 
68 |     #     - From that time t **and after**, the index of the best arm is stored as ``indexes_bestarm``.
69 | 
70 |     #     .. warning:: FIXME This is still experimental!
71 |     #     """
72 |     #     for t in range(time, len(self.indexes_bestarm)):
73 |     #         self.indexes_bestarm[t] = indexes_bestarm
74 | 


--------------------------------------------------------------------------------
/SMPyBandits/Environment/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ ``Environment`` module:
 3 | 
 4 | - :class:`MAB`, :class:`MarkovianMAB`, :class:`ChangingAtEachRepMAB`, :class:`IncreasingMAB`, :class:`PieceWiseStationaryMAB`, :class:`NonStationaryMAB` objects, used to wrap the problems (essentially a list of arms).
 5 | - :class:`Result` and :class:`ResultMultiPlayers` objects, used to wrap simulation results (list of decisions and rewards).
 6 | - :class:`Evaluator` environment, used to wrap simulation, for the single player case.
 7 | - :class:`EvaluatorMultiPlayers` environment, used to wrap simulation, for the multi-players case.
 8 | - :class:`EvaluatorSparseMultiPlayers` environment, used to wrap simulation, for the multi-players case with sparse activated players.
 9 | - :mod:`CollisionModels` implements different collision models.
10 | 
11 | And useful constants and functions for the plotting and stuff:
12 | 
13 | - :data:`DPI`, :func:`signature`, :func:`maximizeWindow`, :func:`palette`, :func:`makemarkers`, :func:`wraptext`: for plotting,
14 | - :func:`notify`: send a desktop notification,
15 | - :func:`Parallel`, :func:`delayed`: joblib related,
16 | - :mod:`tqdm`: pretty range() loops,
17 | - :mod:`sortedDistance`, :mod:`fairnessMeasures`: science related,
18 | - :func:`getCurrentMemory`, :func:`sizeof_fmt`: to measure and pretty print memory consumption.
19 | """
20 | from __future__ import division, print_function  # Python 2 compatibility
21 | 
22 | __author__ = "Lilian Besson"
23 | __version__ = "0.9"
24 | 
25 | from .MAB import MAB, MarkovianMAB, ChangingAtEachRepMAB, IncreasingMAB, PieceWiseStationaryMAB, NonStationaryMAB
26 | 
27 | from .Result import Result
28 | from .Evaluator import Evaluator
29 | from .StrategicEvaluator import StrategicEvaluator
30 | 
31 | from .CollisionModels import *
32 | from .ResultMultiPlayers import ResultMultiPlayers
33 | from .EvaluatorMultiPlayers import EvaluatorMultiPlayers
34 | from .EvaluatorSparseMultiPlayers import EvaluatorSparseMultiPlayers
35 | 
36 | from .plotsettings import DPI, signature, maximizeWindow, palette, makemarkers, wraptext
37 | 
38 | from .notify import notify
39 | 
40 | from .usejoblib import USE_JOBLIB, Parallel, delayed
41 | from .usetqdm import USE_TQDM, tqdm
42 | 
43 | from .sortedDistance import weightedDistance, manhattan, kendalltau, spearmanr, gestalt, meanDistance, sortedDistance
44 | from .fairnessMeasures import amplitude_fairness, std_fairness, rajjain_fairness, mo_walrand_fairness, mean_fairness, fairnessMeasure, fairness_mapping
45 | 
46 | from .memory_consumption import getCurrentMemory, sizeof_fmt, start_tracemalloc, display_top_tracemalloc
47 | 


--------------------------------------------------------------------------------
/SMPyBandits/Environment/plot_Cmu_HOI.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ Plot the C(mu) Lai & Robbins term and the HOI(mu) OI factor for various Bernoulli MAB problem."""
 3 | from __future__ import division, print_function  # Python 2 compatibility
 4 | 
 5 | __author__ = "Lilian Besson"
 6 | __version__ = "0.6"
 7 | 
 8 | from itertools import product
 9 | import numpy as np
10 | import matplotlib.pyplot as plt
11 | 
12 | # Local imports
13 | from sys import path; path.insert(0, '..')
14 | try:
15 |     from .usenumba import jit
16 |     from .usetqdm import tqdm
17 |     from .plotsettings import maximizeWindow, legend
18 |     from .Arms import *
19 | except ImportError:
20 |     from usenumba import jit
21 |     from usetqdm import tqdm
22 |     from plotsettings import maximizeWindow, legend
23 |     from Arms import *
24 | 
25 | oneLR = Bernoulli.oneLR
26 | oneHOI = Bernoulli.oneHOI
27 | 
28 | 
29 | @jit
30 | def cmu(mu):
31 |     """One LR term for Bernoulli problems."""
32 |     best = max(mu)
33 |     return sum(oneLR(best, m) for m in mu if m != best)
34 | 
35 | 
36 | @jit
37 | def oi(mu):
38 |     """One HOI term for Bernoulli problems."""
39 |     best = max(mu)
40 |     return sum(oneHOI(best, m) for m in mu if m != best) / float(len(mu))
41 | 
42 | 
43 | def addit(c, o, mu):
44 |     """Add cmu(mu) to c and o(mu) to c if mu are not all equal."""
45 |     if len(set(mu)) > 1:
46 |         c.append(cmu(mu))
47 |         o.append(oi(mu))
48 | 
49 | 
50 | def main(K, N=50000, T=10):
51 |     """Plot."""
52 |     print("Starting for K =", K)
53 | 
54 |     c1, o1 = [], []
55 |     for _ in tqdm(range(N), desc="Uniformly random (%d)" % N):
56 |         mu = np.random.random(K)
57 |         addit(c1, o1, mu)
58 |     print("c: min =", min(c1), "max =", max(c1))
59 |     print("o: min =", min(o1), "max =", max(o1))
60 | 
61 |     c2, o2 = [], []
62 |     for _ in tqdm(range(N), desc="Gaussian (%d)" % N):
63 |         mu = np.minimum(1, np.maximum(0, np.random.normal(loc=0.5, scale=0.2, size=K)))
64 |         addit(c2, o2, mu)
65 |     print("c: min =", min(c2), "max =", max(c2))
66 |     print("o: min =", min(o2), "max =", max(o2))
67 | 
68 |     c3, o3 = [], []
69 |     for mu in tqdm(product(np.linspace(0, 1, T), repeat=K), desc="Evenly spacen (%d)" % (T**K)):
70 |         addit(c3, o3, mu)
71 |     print("c: min =", min(c3), "max =", max(c3))
72 |     print("o: min =", min(o3), "max =", max(o3))
73 | 
74 |     # for method in [plt.plot, plt.semilogx]:
75 |     for method in [plt.semilogx]:
76 |         plt.figure()
77 |         method(c1, o1, 'o', ms=2, label="Uniform")
78 |         method(c2, o2, 'x', ms=2, label="Gaussian")
79 |         method(c3, o3, 'd', ms=2, label="Evenly spacen")
80 |         legend()
81 |         plt.xlabel(r"Lai & Robbins complexity constant, $C_{\mu}$")
82 |         plt.ylabel(r"Navikkumar Modi HOI factor, $H_{OI}(\mu)$")
83 |         plt.title("Comparison of two complexity criterion, for Bernoulli MAB problems, with $K = {}$ arms.".format(K))
84 |         maximizeWindow()
85 |         plt.show()
86 | 
87 | 
88 | if __name__ == '__main__':
89 |     for K in [3, 5, 7]:
90 |         main(K)
91 | 


--------------------------------------------------------------------------------
/SMPyBandits/Environment/usejoblib.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ Import Parallel and delayed from joblib, safely.
 3 | """
 4 | from __future__ import division, print_function  # Python 2 compatibility
 5 | 
 6 | __author__ = "Lilian Besson"
 7 | __version__ = "0.9"
 8 | 
 9 | try:
10 |     from joblib import Parallel, delayed
11 |     USE_JOBLIB = True
12 | except ImportError:
13 |     print("Warning: joblib not found. Install it from pypi ('pip install joblib') or conda.\n  Info: Not mandatory, but it improves speed computation on multi-core machines.")
14 |     USE_JOBLIB = False
15 | 
16 |     # In case the code uses Parallel and delayed, even if USE_JOBLIB is False
17 |     def Parallel(*args, **kwargs):
18 |         """Fake joblib.Parallel implementation."""
19 |         def fakeParallelWrapper(iterator):
20 |             """ Just a list(iterator)."""
21 |             return list(iterator)
22 |         return fakeParallelWrapper
23 | 
24 |     def delayed(f, *args, **kwargs):
25 |         """Fake joblib.delayed implementation."""
26 |         return f
27 | 
28 | 
29 | # Only export and expose the useful functions defined here
30 | __all__ = [
31 |     "USE_JOBLIB",
32 |     "Parallel",
33 |     "delayed"
34 | ]
35 | 


--------------------------------------------------------------------------------
/SMPyBandits/Environment/usenumba.py:
--------------------------------------------------------------------------------
1 | ../Policies/usenumba.py


--------------------------------------------------------------------------------
/SMPyBandits/Environment/usetqdm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ Import tqdm from tqdm, safely.
 3 | """
 4 | from __future__ import division, print_function  # Python 2 compatibility
 5 | 
 6 | __author__ = "Lilian Besson"
 7 | __version__ = "0.9"
 8 | 
 9 | 
10 | def in_notebook():
11 |     """Check if the code is running inside a Jupyter notebook or not. Cf. http://stackoverflow.com/a/39662359/.
12 | 
13 |     >>> in_notebook()
14 |     False
15 |     """
16 |     try:
17 |         shell = get_ipython().__class__.__name__
18 |         if shell == 'ZMQInteractiveShell':  # Jupyter notebook or qtconsole?
19 |             return True
20 |         elif shell == 'TerminalInteractiveShell':  # Terminal running IPython?
21 |             return False
22 |         else:
23 |             return False  # Other type (?)
24 |     except NameError:
25 |         return False      # Probably standard Python interpreter
26 | 
27 | 
28 | try:
29 |     if in_notebook():
30 |         from tqdm.notebook import tqdm
31 |         print("Info: Using the Jupyter notebook version of the tqdm() decorator, tqdm_notebook() ...")  # DEBUG
32 |     else:
33 |         from tqdm import tqdm
34 |         # print("Info: Using the regular tqdm() decorator ...")  # DEBUG
35 |     USE_TQDM = True
36 | except ImportError:
37 |     print("Warning: tqdm not found. Install it from pypi ('pip install tqdm') or conda.\n  Info: Not mandatory, but it's pretty!")
38 |     USE_TQDM = False
39 | 
40 |     def tqdm(iterator, *args, **kwargs):
41 |         """Fake tqdm.tqdm wrapper, ignore **kwargs like desc='...', and return iterator."""
42 |         return iterator
43 | 
44 | 
45 | # Only export and expose the useful functions defined here
46 | __all__ = [
47 |     "USE_TQDM",
48 |     "tqdm",
49 | ]
50 | 


--------------------------------------------------------------------------------
/SMPyBandits/Experiment/Seznec19a_Fig1/.gitignore:
--------------------------------------------------------------------------------
1 | data/


--------------------------------------------------------------------------------
/SMPyBandits/Experiment/Seznec19a_Fig1/style.mplstyle:
--------------------------------------------------------------------------------
 1 | xtick.labelsize: 25
 2 | ytick.labelsize: 25
 3 | font.size: 40
 4 | figure.autolayout: False
 5 | figure.figsize: 7.2,4.45
 6 | axes.titlesize : 50
 7 | axes.labelsize : 40
 8 | lines.linewidth : 2
 9 | lines.markersize : 6
10 | legend.fontsize: 25
11 | mathtext.fontset: stix
12 | font.family: STIXGeneral
13 | pdf.fonttype : 42
14 | ps.fonttype : 42
15 | axes.grid: False
16 | axes.edgecolor: .15
17 | axes.linewidth: 1.25


--------------------------------------------------------------------------------
/SMPyBandits/Experiment/Seznec19a_Fig2/.gitignore:
--------------------------------------------------------------------------------
1 | data/


--------------------------------------------------------------------------------
/SMPyBandits/Experiment/Seznec19a_Fig2/style.mplstyle:
--------------------------------------------------------------------------------
 1 | xtick.labelsize: 25
 2 | ytick.labelsize: 25
 3 | font.size: 40
 4 | figure.autolayout: False
 5 | figure.figsize: 7.2,4.45
 6 | axes.titlesize : 50
 7 | axes.labelsize : 40
 8 | lines.linewidth : 2
 9 | lines.markersize : 6
10 | legend.fontsize: 25
11 | mathtext.fontset: stix
12 | font.family: STIXGeneral
13 | pdf.fonttype : 42
14 | ps.fonttype : 42
15 | axes.grid: False
16 | axes.edgecolor: .15
17 | axes.linewidth: 1.25


--------------------------------------------------------------------------------
/SMPyBandits/Experiment/Seznec19a_Fig3/.gitignore:
--------------------------------------------------------------------------------
1 | data/


--------------------------------------------------------------------------------
/SMPyBandits/Experiment/Seznec19a_Fig3/plot.py:
--------------------------------------------------------------------------------
 1 | """
 2 | author: Julien SEZNEC
 3 | Plot utility to reproduce Figure 3 of [Seznec et al.,  2019a]
 4 | Reference: [Seznec et al.,  2019a]
 5 | Rotting bandits are not harder than stochastic ones;
 6 | Julien Seznec, Andrea Locatelli, Alexandra Carpentier, Alessandro Lazaric, Michal Valko ;
 7 | Proceedings of Machine Learning Research, PMLR 89:2564-2572, 2019.
 8 | http://proceedings.mlr.press/v89/seznec19a.html
 9 | https://arxiv.org/abs/1811.11043 (updated version)
10 | """
11 | from matplotlib import pyplot as plt
12 | from SMPyBandits.Policies import FEWA, UCB
13 | import os
14 | import numpy as np
15 | 
16 | plt.style.use('seaborn-colorblind')
17 | plt.style.use('style.mplstyle')
18 | 
19 | 
20 | 
21 | def fig3(data, delta , name='fig3A.pdf',  ylim=300):
22 |     # --------------  PLOT  --------------
23 |     fig, ax = plt.subplots(figsize=(12, 10))
24 |     for i, policy in enumerate(data):
25 |         print(data[policy]["mean"])
26 |         X = range(data[policy]["mean"].shape[0])
27 |         ax.plot(X, data[policy]["mean"], label=policy, linewidth=3)
28 |         color = ax.get_lines()[-1].get_c()
29 |         ax.plot(X, data[policy]["uppq"], label=None, linestyle='--', color=color, linewidth=1)
30 |         ax.plot(X, data[policy]["lowq"], label=None, linestyle='--', color=color, linewidth=1)
31 |         plt.fill_between(X, data[policy]["uppq"], data[policy]["lowq"], alpha=.05, color=color)
32 |     plt.xlim(0,5000)
33 |     plt.ylim(0, ylim)
34 |     plt.legend(prop={'variant': 'small-caps'})
35 |     plt.xlabel('Round ($t$)')
36 |     plt.ylabel('Average regret $R_t$')
37 |     ax.xaxis.set_label_coords(0.5, -0.08)
38 |     ax.yaxis.set_label_coords(-0.09, 0.5)
39 |     plt.title('$\Delta = {:.3g}$'.format(delta), y=1.04)
40 |     # -------------- SAVE --------------
41 |     plt.savefig(name)
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     for game in range(1,3):
46 |         policies = [
47 |             [FEWA, {'alpha': .01, 'delta': 1, 'subgaussian': 1}],
48 |             [FEWA, {'alpha': .06, 'delta': 1, 'subgaussian': 1}],
49 |             [FEWA, {'alpha': 0.25, 'delta': 1, 'subgaussian': 1}],
50 |             [UCB, {}]
51 |         ]
52 |         data = {}
53 |         for policy in policies:
54 |             policy_name = str(policy[0](nbArms=2, **policy[1]))
55 |             policy_name_nospace = policy_name.replace(' ', '_')
56 |             policy_data = [
57 |                 np.load(os.path.join('./data', file)) for file in os.listdir('./data') if
58 |                 file.startswith("REGRET%s_"%game + policy_name_nospace)
59 |             ]
60 |             if not policy_data:
61 |                 continue
62 |             policy_data_array = np.concatenate(policy_data, axis=0)
63 |             print(len(policy_data), policy_data_array.shape)
64 |             data[policy_name] = {
65 |                 "mean": policy_data_array.mean(axis=0),
66 |                 "uppq": np.quantile(policy_data_array, 0.9, axis=0),
67 |                 "lowq": np.quantile(policy_data_array, 0.1, axis=0)
68 |             }
69 | 
70 |         fig3(data, delta=0.14 if game == 1 else 1, name='fig3%s.pdf'%game)
71 | 


--------------------------------------------------------------------------------
/SMPyBandits/Experiment/Seznec19a_Fig3/style.mplstyle:
--------------------------------------------------------------------------------
 1 | xtick.labelsize: 25
 2 | ytick.labelsize: 25
 3 | font.size: 40
 4 | figure.autolayout: False
 5 | figure.figsize: 7.2,4.45
 6 | axes.titlesize : 50
 7 | axes.labelsize : 40
 8 | lines.linewidth : 2
 9 | lines.markersize : 6
10 | legend.fontsize: 25
11 | mathtext.fontset: stix
12 | font.family: STIXGeneral
13 | pdf.fonttype : 42
14 | ps.fonttype : 42
15 | axes.grid: False
16 | axes.edgecolor: .15
17 | axes.linewidth: 1.25


--------------------------------------------------------------------------------
/SMPyBandits/Experiment/Seznec19b_Fig1/.gitignore:
--------------------------------------------------------------------------------
1 | data/
2 | Reward/
3 | 


--------------------------------------------------------------------------------
/SMPyBandits/Experiment/Seznec19b_Fig1/style.mplstyle:
--------------------------------------------------------------------------------
 1 | xtick.labelsize: 25
 2 | ytick.labelsize: 25
 3 | font.size: 40
 4 | figure.autolayout: False
 5 | figure.figsize: 7.2,4.45
 6 | axes.titlesize : 50
 7 | axes.labelsize : 40
 8 | lines.linewidth : 2
 9 | lines.markersize : 6
10 | legend.fontsize: 25
11 | mathtext.fontset: stix
12 | font.family: STIXGeneral
13 | pdf.fonttype : 42
14 | ps.fonttype : 42
15 | axes.grid: False
16 | axes.edgecolor: .15
17 | axes.linewidth: 1.25


--------------------------------------------------------------------------------
/SMPyBandits/Experiment/Seznec_EFF/.gitignore:
--------------------------------------------------------------------------------
1 | data/


--------------------------------------------------------------------------------
/SMPyBandits/Experiment/Seznec_EFF/main.py:
--------------------------------------------------------------------------------
 1 | """
 2 | author: Julien SEZNEC
 3 | Produce the experiment about the efficiency of EFF_RAWUCB
 4 | For the thesis manuscript.
 5 | """
 6 | 
 7 | from SMPyBandits.Arms import RestedRottingGaussian
 8 | from SMPyBandits.Policies import  GreedyOracle, RAWUCB, EFF_RAWUCB, wSWA
 9 | from SMPyBandits.Environment.MAB_rotting import repetedRuns
10 | import numpy as np
11 | import datetime
12 | import os
13 | import logging
14 | import sys
15 | 
16 | date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f")
17 | PARALLEL = -1  # Set positive int to indicate the number of core, -1 to use all the cores, and False to not parallelize
18 | REPETITIONS = 1 if len(sys.argv) < 3 else int(sys.argv[2])  # Set the number of repetitions
19 | HORIZON = T = 10**6  # Horizon T
20 | sigma = 1  # Gaussian noise std
21 | K = 2
22 | mu = 0.1
23 | 
24 | ### SET Policies
25 | policies = [
26 |   [RAWUCB, {'alpha': 1.4}],  # 0
27 |   [EFF_RAWUCB, {'alpha': 1.4, 'm':2}],  # 1
28 |   [wSWA, {'alpha': 0.002}],  # 2
29 |   [wSWA, {'alpha': 0.02}],  # 3
30 |   [wSWA, {'alpha': 0.2}],  # 4
31 |   [EFF_RAWUCB, {'alpha': 1.4, 'm': 1.01}],  # 5
32 |   [EFF_RAWUCB, {'alpha': 1.4, 'm': 1.1}],  # 6
33 |   [EFF_RAWUCB, {'alpha': 1.4, 'm': 1.2}],  # 7
34 |   [EFF_RAWUCB, {'alpha': 1.4, 'm': 1.3}],  # 8
35 |   [EFF_RAWUCB, {'alpha': 1.4, 'm': 1.5}],  # 9
36 |   [EFF_RAWUCB, {'alpha': 1.4, 'm': 1.9}],  # 10
37 |   [EFF_RAWUCB, {'alpha': 1.4, 'm': 2.1}],  # 11
38 |   [EFF_RAWUCB, {'alpha': 1.4, 'm': 3}],  # 12
39 |   [EFF_RAWUCB, {'alpha': 1.4, 'm': 10}],  # 13
40 | ]
41 | policy_ind = 10 if len(sys.argv) == 1 else int(sys.argv[1])
42 | policy = policies[policy_ind]
43 | policy_name = str(policy[0](nbArms=2, **policy[1]))
44 | policy_name_nospace = policy_name.replace(' ', '_')
45 | 
46 | regret_path = os.path.join('./data', 'REGRET_' + policy_name_nospace + '_' + date)
47 | time_path = os.path.join('./data', 'TIME_' + policy_name_nospace + '_' + date)
48 | os.makedirs('./data/logging/', exist_ok=True)
49 | logging.basicConfig(filename=os.path.join('./data/logging', date + '.log'), level=logging.INFO,
50 |                     format='%(asctime)s %(message)s')
51 | logging.info("Policy : %s$" % (policy_name))
52 | 
53 | ### SET L/2
54 | logging.info("CONFIG : CPU %s" % os.cpu_count())
55 | logging.info("CONFIG : REPETITIONS %s" % REPETITIONS)
56 | logging.info("CONFIG : HORIZON %s" % HORIZON)
57 | logging.info("CONFIG : SIGMA %s" % sigma)
58 | logging.info("CONFIG : $\mu = %s$" % mu)
59 | 
60 | noisy_reward_res = []
61 | regret_res = []
62 | time_res = []
63 | overpull_res = []
64 | ### SET K arms
65 | arms = [
66 |   [
67 |     RestedRottingGaussian,
68 |     {'decayingFunction': lambda n: mu if n <= HORIZON / 4 else -mu, 'sigma': sigma, }
69 |   ],
70 |   [
71 |     RestedRottingGaussian,
72 |     {'decayingFunction': lambda n: 0, 'sigma': sigma, }
73 |   ],
74 | ]
75 | rew, noisy_rew, time, pulls, cumul_pulls = repetedRuns(policy, arms, rep=REPETITIONS, T=HORIZON, parallel=PARALLEL)
76 | oracle_rew, noisy_oracle_rew, oracle_time, oracle_pull, oracle_cumul_pulls = repetedRuns(
77 |   [GreedyOracle, {}], arms, rep=1, T=HORIZON, oracle=True
78 | )
79 | regret = oracle_rew - rew
80 | regret_res.append(regret)
81 | time_res.append(time)
82 | logging.info("EVENT : SAVING ... ")
83 | np.save(regret_path, np.array(regret_res))
84 | np.save(time_path, np.array(time_res))
85 | logging.info("EVENT : END ... ")
86 | 


--------------------------------------------------------------------------------
/SMPyBandits/Experiment/Seznec_EFF/style.mplstyle:
--------------------------------------------------------------------------------
 1 | xtick.labelsize: 25
 2 | ytick.labelsize: 25
 3 | font.size: 40
 4 | figure.autolayout: False
 5 | figure.figsize: 7.2,4.45
 6 | axes.titlesize : 50
 7 | axes.labelsize : 40
 8 | lines.linewidth : 2
 9 | lines.markersize : 6
10 | legend.fontsize: 25
11 | mathtext.fontset: stix
12 | font.family: STIXGeneral
13 | pdf.fonttype : 42
14 | ps.fonttype : 42
15 | axes.grid: False
16 | axes.edgecolor: .15
17 | axes.linewidth: 1.25


--------------------------------------------------------------------------------
/SMPyBandits/Experiment/Seznec_asymptotic/.gitignore:
--------------------------------------------------------------------------------
1 | data/


--------------------------------------------------------------------------------
/SMPyBandits/Experiment/Seznec_asymptotic/main.py:
--------------------------------------------------------------------------------
 1 | """
 2 | author: Julien SEZNEC
 3 | Produce the experiment about the (potential) asymptotic optimality of RAW-UCB++
 4 | For the thesis manuscript.
 5 | """
 6 | 
 7 | from SMPyBandits.Arms import RestedRottingGaussian, UnboundedGaussian as Gaussian
 8 | from SMPyBandits.Policies import  GreedyOracle, RAWUCB, EFF_RAWUCB, EFF_RAWUCB_pp,  MOSSAnytime, UCB
 9 | from SMPyBandits.Environment.MAB_rotting import repetedRuns
10 | import numpy as np
11 | import datetime
12 | import os
13 | import logging
14 | import sys
15 | 
16 | date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f")
17 | PARALLEL = -1  # Set positive int to indicate the number of core, -1 to use all the cores, and False to not parallelize
18 | REPETITIONS = 1 if len(sys.argv) < 3 else int(sys.argv[2])  # Set the number of repetitions
19 | HORIZON = T = 10**6  # Horizon T
20 | sigma = 1  # Gaussian noise std
21 | K = 2
22 | 
23 | ### SET Policies
24 | policies = [
25 |   [MOSSAnytime, {'alpha':3}], #0
26 |   [EFF_RAWUCB, {'alpha': 1.4, 'm': 1.01}],  # 1
27 |   [EFF_RAWUCB_pp, {'beta': 0, 'm': 1.01}], # 2
28 |   [EFF_RAWUCB_pp, {'beta': 1, 'm': 1.01}], # 3
29 |   [EFF_RAWUCB_pp, {'beta': 2, 'm': 1.01}],  # 4
30 |   [EFF_RAWUCB_pp, {'beta': 3, 'm': 1.01}],  # 5
31 |   [UCB, {}], #6
32 |   [EFF_RAWUCB_pp, {'beta': 2.5, 'm': 1.01}],  # 7
33 |   [EFF_RAWUCB_pp, {'beta': 3.5, 'm': 1.01}],  # 8
34 |   [EFF_RAWUCB_pp, {'alpha': 1.3, 'm': 1.01}],  # 9
35 |   [EFF_RAWUCB_pp, {'alpha': 1.4, 'm': 1.01}],  # 10
36 |   [EFF_RAWUCB_pp, {'alpha': 1.5, 'm': 1.01}],  # 11
37 |   [EFF_RAWUCB_pp, {'alpha': 1.7, 'm': 1.01}],  # 12
38 | ]
39 | policy_ind = 9 if len(sys.argv) == 1 else int(sys.argv[1])
40 | policy = policies[policy_ind]
41 | policy_name = str(policy[0](nbArms=2, **policy[1]))
42 | policy_name_nospace = policy_name.replace(' ', '_')
43 | 
44 | regret_path = os.path.join('./data', 'REGRET_' + policy_name_nospace + '_' + date)
45 | time_path = os.path.join('./data', 'TIME_' + policy_name_nospace + '_' + date)
46 | os.makedirs('./data/logging/', exist_ok=True)
47 | logging.basicConfig(filename=os.path.join('./data/logging', date + '.log'), level=logging.INFO,
48 |                     format='%(asctime)s %(message)s')
49 | logging.info("Policy : %s$" % (policy_name))
50 | 
51 | ### SET L/2
52 | mus = [0.01, 1]
53 | logging.info("CONFIG : CPU %s" % os.cpu_count())
54 | logging.info("CONFIG : REPETITIONS %s" % REPETITIONS)
55 | logging.info("CONFIG : HORIZON %s" % HORIZON)
56 | logging.info("CONFIG : SIGMA %s" % sigma)
57 | 
58 | noisy_reward_res = []
59 | regret_res = []
60 | time_res = []
61 | overpull_res = []
62 | for m, mu in enumerate(mus):
63 |   logging.info("GAME %s : $\mu = %s$" % (m, mu))
64 |   print(mu)
65 |   ### SET K arms
66 |   arms = [
67 |     [Gaussian, {"mu":0, "sigma": sigma}],
68 |     [Gaussian, {"mu":mu, "sigma": sigma}]
69 |   ]
70 |   rew, noisy_rew, time, pulls, cumul_pulls = repetedRuns(policy, arms, rep=REPETITIONS, T=HORIZON, parallel=PARALLEL)
71 |   oracle_rew, noisy_oracle_rew, oracle_time, oracle_pull, oracle_cumul_pulls = repetedRuns(
72 |     [GreedyOracle, {}], arms, rep=1, T=HORIZON, oracle=True
73 |   )
74 |   regret = oracle_rew - rew
75 |   regret_res.append(regret)
76 |  # time_res.append(time)
77 | logging.info("EVENT : SAVING ... ")
78 | np.save(regret_path, np.array(regret_res))
79 | logging.info("EVENT : END ... ")
80 | 


--------------------------------------------------------------------------------
/SMPyBandits/Experiment/Seznec_asymptotic/style.mplstyle:
--------------------------------------------------------------------------------
 1 | xtick.labelsize: 25
 2 | ytick.labelsize: 25
 3 | font.size: 40
 4 | figure.autolayout: False
 5 | figure.figsize: 7.2,4.45
 6 | axes.titlesize : 50
 7 | axes.labelsize : 40
 8 | lines.linewidth : 2
 9 | lines.markersize : 6
10 | legend.fontsize: 25
11 | mathtext.fontset: stix
12 | font.family: STIXGeneral
13 | pdf.fonttype : 42
14 | ps.fonttype : 42
15 | axes.grid: False
16 | axes.edgecolor: .15
17 | axes.linewidth: 1.25


--------------------------------------------------------------------------------
/SMPyBandits/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016-2018 Lilian Besson (Naereen), https://GitHub.com/Naereen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/.gitignore:
--------------------------------------------------------------------------------
1 | # automatically generated with cython for kullback_cython.pyx
2 | kullback.c
3 | kullback_cython.c
4 | build/
5 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/BayesUCB.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The Bayes-UCB policy.
 3 | 
 4 | - By default, it uses a Beta posterior (:class:`Policies.Posterior.Beta`), one by arm.
 5 | - Reference: [Kaufmann, Cappé & Garivier - AISTATS, 2012]
 6 | """
 7 | from __future__ import division, print_function  # Python 2 compatibility
 8 | 
 9 | __author__ = "Olivier Cappé, Aurélien Garivier, Emilie Kaufmann, Lilian Besson"
10 | __version__ = "0.5"
11 | 
12 | try:
13 |   from .BayesianIndexPolicy import BayesianIndexPolicy
14 | except ImportError:
15 |   from BayesianIndexPolicy import BayesianIndexPolicy
16 | 
17 | 
18 | class BayesUCB(BayesianIndexPolicy):
19 |     """ The Bayes-UCB policy.
20 | 
21 |     - By default, it uses a Beta posterior (:class:`Policies.Posterior.Beta`), one by arm.
22 |     -Reference: [Kaufmann, Cappé & Garivier - AISTATS, 2012].
23 |     """
24 | 
25 |     def computeIndex(self, arm):
26 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k, giving :math:`S_k(t)` rewards of 1, by taking the :math:`1 - \frac{1}{t}` quantile from the Beta posterior:
27 | 
28 |         .. math:: I_k(t) = \mathrm{Quantile}\left(\mathrm{Beta}(1 + S_k(t), 1 + N_k(t) - S_k(t)), 1 - \frac{1}{t}\right).
29 |         """
30 |         return self.posterior[arm].quantile(1. - 1. / (1 + self.t))
31 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/BayesianIndexPolicy.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ Basic Bayesian index policy. By default, it uses a Beta posterior. """
 3 | from __future__ import division, print_function  # Python 2 compatibility
 4 | 
 5 | __author__ = "Lilian Besson"
 6 | __version__ = "0.9"
 7 | 
 8 | try:
 9 |     from .IndexPolicy import IndexPolicy
10 |     from .Posterior import Beta
11 | except ImportError:
12 |     from IndexPolicy import IndexPolicy
13 |     from Posterior import Beta
14 | 
15 | 
16 | class BayesianIndexPolicy(IndexPolicy):
17 |     """ Basic Bayesian index policy.
18 | 
19 |     - By default, it uses a Beta posterior (:class:`Policies.Posterior.Beta`), one by arm.
20 |     - Use ``*args`` and ``**kwargs`` if you want to give parameters to the underlying posteriors.
21 |     - Or use ``params_for_each_posterior`` as a *list* of parameters (as a dictionary) to give a different set of parameters for each posterior.
22 |     """
23 | 
24 |     def __init__(self, nbArms,
25 |             posterior=Beta,
26 |             lower=0., amplitude=1.,
27 |             *args, **kwargs
28 |         ):
29 |         """ Create a new Bayesian policy, by creating a default posterior on each arm."""
30 |         super(BayesianIndexPolicy, self).__init__(nbArms, lower=lower, amplitude=amplitude)
31 |         self.posterior = [None] * nbArms  #: Posterior for each arm. List instead of dict, quicker access
32 |         if 'params_for_each_posterior' in kwargs:
33 |             params = kwargs['params_for_each_posterior']
34 |             print("'params_for_each_posterior' is in kwargs, so using params =\n{}\nas a list of parameters to give to each posterior.".format(params))  # DEBUG
35 |             for arm in range(self.nbArms):
36 |                 print("Creating posterior for arm {}, with params = {}.".format(arm, params[arm]))  # DEBUG
37 |                 self.posterior[arm] = posterior(**params[arm])
38 |         else:
39 |             for arm in range(self.nbArms):
40 |                 # print("Creating posterior for arm {}, with args = {} and kwargs = {}.".format(arm, args, kwargs))  # DEBUG
41 |                 self.posterior[arm] = posterior(*args, **kwargs)
42 |         self._posterior_name = str(self.posterior[0].__class__.__name__)
43 | 
44 |     def __str__(self):
45 |         """ -> str"""
46 |         if self._posterior_name == "Beta":
47 |             return "{}".format(self.__class__.__name__)
48 |         else:
49 |             return "{}({})".format(self.__class__.__name__, self._posterior_name)
50 | 
51 |     def startGame(self):
52 |         """ Reset the posterior on each arm."""
53 |         self.t = 0
54 |         for arm in range(self.nbArms):
55 |             self.posterior[arm].reset()
56 |         # print("Policy {} reinitialized with posteriors: {}".format(self, [str(p) for p in self.posterior])) # DEBUG
57 | 
58 |     def getReward(self, arm, reward):
59 |         """ Update the posterior on each arm, with the normalized reward."""
60 |         self.posterior[arm].update((reward - self.lower) / self.amplitude)
61 |         self.t += 1
62 | 
63 |     def computeIndex(self, arm):
64 |         raise NotImplementedError("This method computeIndex(arm) has to be implemented in the child class inheriting from BayesianIndexPolicy.")
65 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/BoltzmannGumbel.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The Boltzmann-Gumbel Exploration (BGE) index policy, a different formulation of the :class:`Exp3` policy with an optimally tune decreasing sequence of temperature parameters :math:`\gamma_t`.
 3 | 
 4 | - Reference: Section 4 of [Boltzmann Exploration Done Right, N.Cesa-Bianchi & C.Gentile & G.Lugosi & G.Neu, arXiv 2017](https://arxiv.org/pdf/1705.10257.pdf).
 5 | - It is an index policy with indexes computed from the empirical mean estimators and a random sample from a Gumbel distribution.
 6 | """
 7 | from __future__ import division, print_function  # Python 2 compatibility
 8 | 
 9 | __author__ = "Lilian Besson"
10 | __version__ = "0.6"
11 | 
12 | import numpy as np
13 | import numpy.random as rn
14 | 
15 | try:
16 |     from .IndexPolicy import IndexPolicy
17 | except ImportError:
18 |     from IndexPolicy import IndexPolicy
19 | 
20 | 
21 | #: Default constant :math:`\sigma` assuming the arm distributions are :math:`\sigma^2`-subgaussian. 1 for Bernoulli arms.
22 | SIGMA = 1
23 | 
24 | class BoltzmannGumbel(IndexPolicy):
25 |     r""" The Boltzmann-Gumbel Exploration (BGE) index policy, a different formulation of the :class:`Exp3` policy with an optimally tune decreasing sequence of temperature parameters :math:`\gamma_t`.
26 | 
27 |     - Reference: Section 4 of [Boltzmann Exploration Done Right, N.Cesa-Bianchi & C.Gentile & G.Lugosi & G.Neu, arXiv 2017](https://arxiv.org/pdf/1705.10257.pdf).
28 |     - It is an index policy with indexes computed from the empirical mean estimators and a random sample from a Gumbel distribution.
29 |     """
30 | 
31 |     def __init__(self, nbArms, C=SIGMA, lower=0., amplitude=1.):
32 |         super(BoltzmannGumbel, self).__init__(nbArms, lower=lower, amplitude=amplitude)
33 |         assert C > 0, "Error: the C parameter for BoltzmannGumbel class has to be > 0."
34 |         self.C = C
35 | 
36 |     def __str__(self):
37 |         return r"BoltzmannGumbel($\alpha={:.3g}$)".format(self.C)
38 | 
39 |     def computeIndex(self, arm):
40 |         r""" Take a random index, at time t and after :math:`N_k(t)` pulls of arm k:
41 | 
42 |         .. math::
43 | 
44 |            I_k(t) &= \frac{X_k(t)}{N_k(t)} + \beta_k(t) Z_k(t), \\
45 |            \text{where}\;\; \beta_k(t) &:= \sqrt{C^2 / N_k(t)}, \\
46 |            \text{and}\;\; Z_k(t) &\sim \mathrm{Gumbel}(0, 1).
47 | 
48 |         Where :math:`\mathrm{Gumbel}(0, 1)` is the standard Gumbel distribution.
49 |         See [Numpy documentation](https://docs.scipy.org/doc/numpy/reference/generated/numpy.random.gumbel.html#numpy.random.gumbel) or [Wikipedia page](https://en.wikipedia.org/wiki/Gumbel_distribution) for more details.
50 |         """
51 |         if self.pulls[arm] < 1:
52 |             return float('+inf')
53 |         else:
54 |             beta_k_t = np.sqrt(self.C ** 2 / self.pulls[arm])
55 |             z_k_t = rn.gumbel(0, 1)
56 |             return (self.rewards[arm] / self.pulls[arm]) + beta_k_t * z_k_t
57 | 
58 |     def computeAllIndex(self):
59 |         """ Compute the current indexes for all arms, in a vectorized manner."""
60 |         beta_t = np.sqrt(self.C ** 2 / self.pulls)
61 |         z_t = rn.gumbel(0, 1, self.nbArms)  # vector samples
62 |         indexes = (self.rewards / self.pulls) + beta_t * z_t
63 |         indexes[self.pulls < 1] = float('+inf')
64 |         self.index[:] = indexes
65 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/C/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | *.so
3 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/C/Makefile:
--------------------------------------------------------------------------------
 1 | # GNU Make makefile to build the kullback C extension
 2 | all: clean build install clean
 3 | 
 4 | build: build2 build3
 5 | 
 6 | build2: kullback.c setup.py
 7 | 	python2 setup.py build
 8 | 
 9 | build3: kullback_py3.c setup.py3
10 | 	python3 setup.py3 build
11 | 
12 | install:
13 | 	\cp build/lib*/kullback.* ../
14 | 
15 | clean: setup.py
16 | 	python2 setup.py clean
17 | 	python3 setup.py3 clean
18 | 	#rm -rvf build/*
19 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/C/README.md:
--------------------------------------------------------------------------------
 1 | # Fast C versions of the utilities in [`kullpack.py`](../kullback.py)
 2 | 
 3 | ## Prefer the Cython version?
 4 | WARNING: I have now written a Cython version of this module, see [`kullback_cython.pyx`](../kullback_cython.pyx).
 5 | It has all the advantages of the C version (speed and memory efficiency), and all the advantages of the Python version (documentation, optional arguments).
 6 | 
 7 | You can have a look to the first examples in [`kullback_cython.pyx`](../kullback_cython.pyx) to see a small comparison between the Cython and C versions.
 8 | 
 9 | TL;DR: I don't recommend that you try using this C version, it's not worth it: the C version is only 2 times faster than the Cython one, and both are between 100 to 200 times faster than the naive Python versions!
10 | 
11 | ### Requirements?
12 | You need either the `cython` package for your version of Python (if you want to compile the  [`kullback_cython.pyx`](../kullback_cython.pyx) file before running your extension), or both the `cython` and `pyximport` packages, if you want to be able to directly import the Cython version with:
13 | 
14 | ```python
15 | >>> import pyximport; pyximport.install()
16 | >>> import kullback_cython as kullback
17 | >>> # then use kullback.klucbBern or others, as if they came from the pure Python version!
18 | ```
19 | 
20 | ---
21 | 
22 | ## Build it
23 | To create the module use
24 | 
25 | ```bash
26 | python setup.py build
27 | python3 setup.py build
28 | ```
29 | 
30 | Or simply use the provided [`Makefile`](Makefile):
31 | 
32 | ```bash
33 | make build
34 | ```
35 | 
36 | The compiled module (`.so` file) will appear in `build/lib.???` (typically `yoursys-yourarch-yourversion`).
37 | 
38 | ## Clean-up
39 | Temporary files in `build/temp.*` can be removed with
40 | 
41 | ```bash
42 | python setup.py clean
43 | python3 setup.py clean
44 | ```
45 | 
46 | Or simply use the provided [`Makefile`](Makefile):
47 | 
48 | ```bash
49 | make build
50 | ```
51 | 
52 | ## Requirements
53 | Building requires the header files and static library, typically available in a package called `python-dev` (on Linux systems).
54 | See [the Python documentation](https://docs.python.org/3/c-api/) for more details.
55 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/C/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ Utility for building the C library for Python 2."""
 3 | 
 4 | __author__ = "Olivier Cappé, Aurélien Garivier"
 5 | __version__ = "$Revision: 1.3 $"
 6 | 
 7 | from distutils.core import setup, Extension
 8 | 
 9 | module1 = Extension('kullback', sources=['kullback.c'])
10 | 
11 | 
12 | setup(name='Kullback utilities',
13 |       version='1.0',
14 |       description='computes various KL divergences',
15 |       ext_modules=[module1]
16 |       )
17 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/C/setup.py3:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ Utility for building the C library for Python 3."""
 3 | 
 4 | __author__ = "Olivier Cappé, Aurélien Garivier"
 5 | __version__ = "$Revision: 1.3 $"
 6 | 
 7 | from distutils.core import setup, Extension
 8 | 
 9 | module1 = Extension('kullback', sources=['kullback_py3.c'])
10 | 
11 | 
12 | setup(name='Kullback utilities',
13 |       version='1.0',
14 |       description='computes various KL divergences',
15 |       ext_modules=[module1]
16 |       )
17 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/DiscountedBayesianIndexPolicy.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ Discounted Bayesian index policy.
 3 | 
 4 | - By default, it uses a DiscountedBeta posterior (:class:`Policies.Posterior.DiscountedBeta`), one by arm.
 5 | - Use discount factor :math:`\gamma\in(0,1)`.
 6 | 
 7 | .. warning:: This is still highly experimental!
 8 | """
 9 | from __future__ import division, print_function  # Python 2 compatibility
10 | 
11 | __author__ = "Lilian Besson"
12 | __version__ = "0.9"
13 | 
14 | try:
15 |     from .BayesianIndexPolicy import BayesianIndexPolicy
16 |     from .Posterior import DiscountedBeta
17 | except ImportError:
18 |     from BayesianIndexPolicy import BayesianIndexPolicy
19 |     from Posterior import DiscountedBeta
20 | 
21 | 
22 | # --- Constants
23 | 
24 | #: Default value for the discount factor :math:`\gamma\in(0,1)`.
25 | #: ``0.95`` is empirically a reasonable value for short-term non-stationary experiments.
26 | GAMMA = 0.95
27 | 
28 | 
29 | # --- Class
30 | 
31 | class DiscountedBayesianIndexPolicy(BayesianIndexPolicy):
32 |     r""" Discounted Bayesian index policy.
33 | 
34 |     - By default, it uses a DiscountedBeta posterior (:class:`Policies.Posterior.DiscountedBeta`), one by arm.
35 |     - Use discount factor :math:`\gamma\in(0,1)`.
36 | 
37 |     - It keeps :math:`\widetilde{S_k}(t)` and :math:`\widetilde{F_k}(t)` the discounted counts of successes and failures (S and F), for each arm k.
38 | 
39 |     - But instead of using :math:`\widetilde{S_k}(t) = S_k(t)` and :math:`\widetilde{N_k}(t) = N_k(t)`, they are updated at each time step using the discount factor :math:`\gamma`:
40 | 
41 |     .. math::
42 | 
43 |         \widetilde{S_{A(t)}}(t+1) &= \gamma \widetilde{S_{A(t)}}(t) + r(t),\\
44 |         \widetilde{S_{k'}}(t+1) &= \gamma \widetilde{S_{k'}}(t), \forall k' \neq A(t).
45 | 
46 |     .. math::
47 | 
48 |         \widetilde{F_{A(t)}}(t+1) &= \gamma \widetilde{F_{A(t)}}(t) + (1 - r(t)),\\
49 |         \widetilde{F_{k'}}(t+1) &= \gamma \widetilde{F_{k'}}(t), \forall k' \neq A(t).
50 |     """
51 | 
52 |     def __init__(self, nbArms,
53 |         gamma=GAMMA, posterior=DiscountedBeta,
54 |         lower=0., amplitude=1.,
55 |         *args, **kwargs
56 |     ):
57 |         """ Create a new Bayesian policy, by creating a default posterior on each arm."""
58 |         super(DiscountedBayesianIndexPolicy, self).__init__(nbArms, posterior=posterior, lower=lower, amplitude=amplitude, gamma=gamma)
59 |         assert 0 < gamma <= 1, "Error: for a DiscountedBayesianIndexPolicy policy, the discount factor has to be in [0,1], but it was {}.".format(gamma)  # DEBUG
60 |         if gamma == 1:
61 |             print("Warning: gamma = 1 is stupid, just use a regular Beta posterior!")  # DEBUG
62 |         self.gamma = gamma  #: Discount factor :math:`\gamma\in(0,1)`.
63 | 
64 |     def __str__(self):
65 |         """ -> str"""
66 |         return r"{}($\gamma={:.5g}${})".format(self.__class__.__name__, self.gamma, self._posterior_name if self._posterior_name != "DiscountedBeta" else "")
67 | 
68 |     def getReward(self, arm, reward):
69 |         """ Update the posterior on each arm, with the normalized reward."""
70 |         self.posterior[arm].update((reward - self.lower) / self.amplitude)
71 |         # DONE we should update the other posterior with "no observation"
72 |         for otherArm in range(self.nbArms):
73 |             if otherArm != arm:
74 |                 self.posterior[arm].discount()
75 |         self.t += 1


--------------------------------------------------------------------------------
/SMPyBandits/Policies/DiscountedThompson.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The Discounted Thompson (Bayesian) index policy.
 3 | 
 4 | - By default, it uses a DiscountedBeta posterior (:class:`Policies.Posterior.DiscountedBeta`), one by arm.
 5 | - Reference: [["Taming Non-stationary Bandits: A Bayesian Approach", Vishnu Raj & Sheetal Kalyani, arXiv:1707.09727](https://arxiv.org/abs/1707.09727)].
 6 | 
 7 | .. warning:: This is still highly experimental!
 8 | """
 9 | from __future__ import division, print_function  # Python 2 compatibility
10 | 
11 | __author__ = "Lilian Besson"
12 | __version__ = "0.9"
13 | 
14 | try:
15 |     from .DiscountedBayesianIndexPolicy import DiscountedBayesianIndexPolicy
16 | except (ImportError, SystemError):
17 |     from DiscountedBayesianIndexPolicy import DiscountedBayesianIndexPolicy
18 | 
19 | 
20 | class DiscountedThompson(DiscountedBayesianIndexPolicy):
21 |     """The DiscountedThompson (Bayesian) index policy.
22 | 
23 |     - By default, it uses a DiscountedBeta posterior (:class:`Policies.Posterior.DiscountedBeta`), one by arm.
24 |     - Reference: [["Taming Non-stationary Bandits: A Bayesian Approach", Vishnu Raj & Sheetal Kalyani, arXiv:1707.09727](https://arxiv.org/abs/1707.09727)].
25 |     """
26 | 
27 |     def computeIndex(self, arm):
28 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k, by sampling from the DiscountedBeta posterior.
29 | 
30 |         .. math::
31 |             A(t) &\sim U(\arg\max_{1 \leq k \leq K} I_k(t)),\\
32 |             I_k(t) &\sim \mathrm{Beta}(1 + \widetilde{S_k}(t), 1 + \widetilde{F_k}(t)).
33 | 
34 |         - It keeps :math:`\widetilde{S_k}(t)` and :math:`\widetilde{F_k}(t)` the discounted counts of successes and failures (S and F), for each arm k.
35 | 
36 |         - But instead of using :math:`\widetilde{S_k}(t) = S_k(t)` and :math:`\widetilde{N_k}(t) = N_k(t)`, they are updated at each time step using the discount factor :math:`\gamma`:
37 | 
38 |         .. math::
39 | 
40 |             \widetilde{S_{A(t)}}(t+1) &= \gamma \widetilde{S_{A(t)}}(t) + r(t),\\
41 |             \widetilde{S_{k'}}(t+1) &= \gamma \widetilde{S_{k'}}(t), \forall k' \neq A(t).
42 | 
43 |         .. math::
44 | 
45 |             \widetilde{F_{A(t)}}(t+1) &= \gamma \widetilde{F_{A(t)}}(t) + (1 - r(t)),\\
46 |             \widetilde{F_{k'}}(t+1) &= \gamma \widetilde{F_{k'}}(t), \forall k' \neq A(t).
47 |         """
48 |         return self.posterior[arm].sample()
49 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/EmpiricalMeans.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The naive Empirical Means policy for bounded bandits: like UCB but without a bias correction term. Note that it is equal to UCBalpha with alpha=0, only quicker."""
 3 | from __future__ import division, print_function  # Python 2 compatibility
 4 | 
 5 | __author__ = "Lilian Besson"
 6 | __version__ = "0.1"
 7 | 
 8 | import numpy as np
 9 | np.seterr(divide='ignore', invalid='ignore')  # XXX dangerous in general, controlled here!
10 | 
11 | try:
12 |     from .IndexPolicy import IndexPolicy
13 | except ImportError:
14 |     from IndexPolicy import IndexPolicy
15 | 
16 | 
17 | class EmpiricalMeans(IndexPolicy):
18 |     """ The naive Empirical Means policy for bounded bandits: like UCB but without a bias correction term. Note that it is equal to UCBalpha with alpha=0, only quicker."""
19 | 
20 |     def computeIndex(self, arm):
21 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k:
22 | 
23 |         .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)}.
24 |         """
25 |         if self.pulls[arm] < 1:
26 |             return float('+inf')
27 |         else:
28 |             return self.rewards[arm] / self.pulls[arm]
29 | 
30 |     def computeAllIndex(self):
31 |         """ Compute the current indexes for all arms, in a vectorized manner."""
32 |         indexes = self.rewards / self.pulls
33 |         indexes[self.pulls < 1] = float('+inf')
34 |         self.index[:] = indexes
35 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/Experimentals/.gitignore:
--------------------------------------------------------------------------------
1 | # automatically generated with cython for kullback_cython.pyx
2 | kullback.c
3 | kullback_cython.c
4 | build/
5 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/Experimentals/KLempUCB.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The Empirical KL-UCB algorithm non-parametric policy.
 3 | Reference: [Maillard, Munos & Stoltz - COLT, 2011], [Cappé, Garivier,  Maillard, Munos & Stoltz, 2012].
 4 | """
 5 | from __future__ import division, print_function  # Python 2 compatibility
 6 | 
 7 | __author__ = "Olivier Cappé, Aurélien Garivier, Lilian Besson"
 8 | __version__ = "0.1"
 9 | 
10 | # WARNING: this is a HUGE hack to fix a mystery bug on importing this policy
11 | from sys import path
12 | from os.path import dirname
13 | path.insert(0, '/'.join(dirname(__file__).split('/')[:-1]))
14 | import numpy as np
15 | 
16 | try:
17 |     from .kullback import maxEV   # XXX Not detected as in the kullback.py file ?
18 |     from .IndexPolicy import IndexPolicy
19 | except ImportError:
20 |     from kullback import maxEV   # XXX Not detected as in the kullback.py file ?
21 |     from IndexPolicy import IndexPolicy
22 | 
23 | 
24 | class KLempUCB(IndexPolicy):
25 |     """ The Empirical KL-UCB algorithm non-parametric policy.
26 |     References: [Maillard, Munos & Stoltz - COLT, 2011], [Cappé, Garivier,  Maillard, Munos & Stoltz, 2012].
27 |     """
28 | 
29 |     def __init__(self, nbArms, maxReward=1., lower=0., amplitude=1.):
30 |         super(KLempUCB, self).__init__(nbArms, lower=lower, amplitude=amplitude)
31 |         self.c = 1  #: Parameter c
32 |         self.maxReward = maxReward  #: Known upper bound on the rewards
33 |         self.pulls = np.zeros(self.nbArms, dtype=int)  #: Keep track of pulls of each arm
34 |         #: UNBOUNDED dictionnary for each arm: keep track of how many observation of each rewards were seen.
35 |         #: Warning: KLempUCB works better for *discrete* distributions!
36 |         self.obs = [dict()] * self.nbArms
37 | 
38 |     def startGame(self):
39 |         """ Initialize the policy for a new game."""
40 |         self.t = 0
41 |         self.pulls.fill(0)
42 |         for arm in range(self.nbArms):
43 |             self.obs[arm] = {self.maxReward: 0}
44 | 
45 |     def computeIndex(self, arm):
46 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k."""
47 |         if self.pulls[arm] < 1:
48 |             return float('+infinity')
49 |         else:
50 |             return self._KLucb(self.obs[arm], self.c * np.log(self.t) / self.pulls[arm])
51 | 
52 |     def getReward(self, arm, reward):
53 |         """ Give a reward: increase t, pulls, and update count of observations for that arm."""
54 |         self.t += 1
55 |         self.pulls[arm] += 1
56 |         self.obs[arm][reward] = 1 + self.obs[arm].get(reward, 0)
57 | 
58 |     # FIXME this does not work apparently...
59 |     @staticmethod
60 |     def _KLucb(obs, klMax, debug=False):
61 |         """ Optimization method."""
62 |         p = np.array(list(obs.values()), dtype=float)
63 |         p /= np.sum(p)
64 |         v = np.array(list(obs.keys()), dtype=float)
65 |         if debug:
66 |             print("Calling maxEV(", p, ", ", v, ", ", klMax, ") ...")
67 |         q = maxEV(p, v, klMax)
68 |         # if debug:
69 |         #     q2 = kbp.maxEV(p, v, klMax)
70 |         #     if max(abs(q - q2)) > 1e-8:
71 |         #         print("ERROR: for p=", p, " ,v = ", v, " and klMax = ", klMax, " : ")
72 |         #         print("q = ", q)
73 |         #         print("q2 = ", q2)
74 |         #         print("_____________________________")
75 |         #         print("q = ", q)
76 |         return np.dot(q, v)
77 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/Experimentals/Makefile:
--------------------------------------------------------------------------------
 1 | # Basic Makefile to compile a Cython extension.
 2 | # It is used to compile the cython_extensions extension, by running 'make cython_extensions'
 3 | 
 4 | cython_extensions3:	cython_extensions
 5 | cython_extensions:
 6 | 	python3 setup.py build_ext --inplace
 7 | 	-cp -vf SMPyBandits/Policies/Experimentals/*.so ./
 8 | 	-chmod -x ./*.so
 9 | 	-chmod g-w ./*.so
10 | 	-chmod o-w ./*.so
11 | 	-ls -larth ./*.so
12 | 	-rm -vfr ./build ./*.c
13 | 	# -mv -vf ./SMPyBandits /tmp/
14 | 
15 | cython_extensions2:
16 | 	python2 setup.py build_ext --inplace
17 | 	-cp -vf SMPyBandits/Policies/Experimentals/*.so ./
18 | 	-chmod -x ./*.so
19 | 	-chmod g-w ./*.so
20 | 	-chmod o-w ./*.so
21 | 	-ls -larth ./*.so
22 | 	-rm -vfr ./build ./*.c
23 | 	# -mv -vf ./SMPyBandits /tmp/
24 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/Experimentals/README.md:
--------------------------------------------------------------------------------
 1 | # [Single-Player policies](https://smpybandits.github.io/docs/Policies.Experimentals.html)
 2 | > See here the documentation: [docs/Policies.Experimentals](https://smpybandits.github.io/docs/Policies.Experimentals.html)
 3 | 
 4 | ## List of experimental policies
 5 | ``Policies.Experimentals.Experimentals`` module : contains experimental or unfinished (single-player) bandits algorithms:
 6 | 
 7 | - Index based UCB algorithms: [`UCBlog10`](UCBlog10.py), [`UCBwrong`](UCBwrong.py), [`UCBlog10alpha`](UCBlog10alpha.py), [`UCBcython`](UCBcython.py), [`UCBjulia`](UCBjulia.py) (with [`UCBjulia.jl`](UCBjulia.jl)),
 8 | 
 9 | - Based on Kullback-Leibler divergence: [`klUCBlog10`](klUCBlog10.py), [`klUCBloglog10`](klUCBloglog10.py),
10 | 
11 | - Empirical KL-UCB algorithm: [`KLempUCB`](KLempUCB.py) (does not work with the C optimized version of [`kullback`](kullback.py),
12 | 
13 | - An *experimental* policy, using Unsupervised Learning: [`UnsupervisedLearning`](UnsupervisedLearning.py),
14 | 
15 | - An *experimental* policy, using Black-box optimization: [`BlackBoxOpt`](BlackBoxOpt.py),
16 | 
17 | - Bayesian algorithms: [`ThompsonRobust`](ThompsonRobust.py),
18 | 
19 | - **New!** The UCBoost (Upper Confidence bounds with Boosting) policies, first with no boosting, in module [`UCBoost_faster`](UCBoost_faster.py): `UCBoost_faster.UCB_sq`, `UCBoost_faster.UCB_bq`, `UCBoost_faster.UCB_h`, `UCBoost_faster.UCB_lb`, `UCBoost_faster.UCB_t`, and then the ones with non-adaptive boosting: `UCBoost_faster.UCBoost_bq_h_lb`, `UCBoost_faster.UCBoost_bq_h_lb_t`, `UCBoost_faster.UCBoost_bq_h_lb_t_sq`, `UCBoost_faster.UCBoost`, and finally the epsilon-approximation boosting with `UCBoost_faster.UCBoostEpsilon`. These versions use Cython for some functions.
20 | 
21 | - **New!** The UCBoost (Upper Confidence bounds with Boosting) policies, first with no boosting, in module [`UCBoost_cython`](UCBoost_cython.py): `UCBoost_cython.UCB_sq`, `UCBoost_cython.UCB_bq`, `UCBoost_cython.UCB_h`, `UCBoost_cython.UCB_lb`, `UCBoost_cython.UCB_t`, and then the ones with non-adaptive boosting: `UCBoost_cython.UCBoost_bq_h_lb`, `UCBoost_cython.UCBoost_bq_h_lb_t`, `UCBoost_cython.UCBoost_bq_h_lb_t_sq`, `UCBoost_cython.UCBoost`, and finally the epsilon-approximation boosting with `UCBoost_cython.UCBoostEpsilon`. These versions use Cython for the whole code.
22 | 
23 | 
24 | ## API
25 | All policies have the same interface, as described in [`BasePolicy`](../BasePolicy.py),
26 | in order to use them in any experiment with the following approach:
27 | 
28 | ```python
29 | my_policy = Policy(nbArms)
30 | my_policy.startGame()  # start the game
31 | for t in range(T):
32 |     chosen_arm_t = k_t = my_policy.choice()  # chose one arm
33 |     reward_t     = sampled from an arm k_t   # sample a reward
34 |     my_policy.getReward(k_t, reward_t)       # give it the the policy
35 | ```
36 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/Experimentals/ThompsonRobust.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """The Thompson (Bayesian) index policy, using an average of 20 index. By default, it uses a Beta posterior.
 3 | Reference: [Thompson - Biometrika, 1933].
 4 | """
 5 | from __future__ import division, print_function  # Python 2 compatibility
 6 | 
 7 | __author__ = "Lilian Besson"
 8 | __version__ = "0.6"
 9 | 
10 | import numpy as np
11 | 
12 | # WARNING: this is a HUGE hack to fix a mystery bug on importing this policy
13 | from sys import path
14 | from os.path import dirname
15 | path.insert(0, '/'.join(dirname(__file__).split('/')[:-1]))
16 | try:
17 |     from .Thompson import Thompson
18 |     from .Posterior import Beta
19 | except ImportError:
20 |     from Thompson import Thompson
21 |     from Posterior import Beta
22 | 
23 | 
24 | #: Default value of how many indexes are computed by sampling the posterior
25 | #: for the ThompsonRobust variant.
26 | AVERAGEON = 10
27 | 
28 | 
29 | class ThompsonRobust(Thompson):
30 |     """The Thompson (Bayesian) index policy, using an average of 20 index. By default, it uses a Beta posterior.
31 |     Reference: [Thompson - Biometrika, 1933].
32 |     """
33 | 
34 |     def __init__(self, nbArms, posterior=Beta, averageOn=AVERAGEON, lower=0., amplitude=1.):
35 |         super(ThompsonRobust, self).__init__(nbArms, posterior=posterior, lower=lower, amplitude=amplitude)
36 |         assert averageOn >= 1, "Error: invalid value for 'averageOn' parameter for ThompsonRobust, should be >= 1."  # DEBUG
37 |         self.averageOn = averageOn  #: How many indexes are computed before averaging
38 | 
39 |     def __str__(self):
40 |         return "%s(averageOn = %i)" % (self.__class__.__name__, self.averageOn)
41 | 
42 |     def computeIndex(self, arm):
43 |         r""" Compute the current index for this arm, by sampling averageOn times the posterior and returning the average index.
44 | 
45 |         At time t and after :math:`N_k(t)` pulls of arm k, giving :math:`S_k(t)` rewards of 1, by sampling from the Beta posterior and averaging:
46 | 
47 |         .. math::
48 | 
49 |            I_k(t) &= \frac{1}{\mathrm{averageOn}} \sum_{i=1}^{\mathrm{averageOn}} I_k^{(i)}(t), \\
50 |            I_k^{(i)}(t) &\sim \mathrm{Beta}(1 + S_k(t), 1 + N_k(t) - S_k(t)).
51 |         """
52 |         return np.mean([self.posterior[arm].sample() for _ in range(self.averageOn)])
53 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/Experimentals/UCBcython.pyx:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The UCB1 (UCB-alpha) index policy, using a Cython extension.
 3 | 
 4 | - Reference: [Auer et al. 02].
 5 | 
 6 | .. warning::
 7 | 
 8 |     This extension should be used with the ``setup.py`` script, by running::
 9 | 
10 |         $ python setup.py build_ext --inplace
11 | 
12 |     You can also use [pyximport](http://docs.cython.org/en/latest/src/tutorial/cython_tutorial.html#pyximport-cython-compilation-for-developers) to import the ``kullback_cython`` module transparently:
13 | 
14 |     >>> import pyximport; pyximport.install()  # instantaneous  # doctest: +ELLIPSIS
15 |     (None, <pyximport.pyximport.PyxImporter at 0x...>)
16 |     >>> from UCBcython import *     # takes about two seconds
17 | """
18 | from __future__ import division, print_function  # Python 2 compatibility
19 | 
20 | __author__ = "Lilian Besson"
21 | __version__ = "0.9"
22 | 
23 | from libc.math cimport log, sqrt, exp, ceil, floor
24 | 
25 | import numpy as np
26 | # cimport numpy as np  # WARNING might be deprecated
27 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
28 | from sys import path; path.insert(0, '..')
29 | 
30 | try:
31 |     # from IndexPolicy import IndexPolicy
32 |     import IndexPolicy as INDEXPOLICY
33 |     IndexPolicy = INDEXPOLICY.IndexPolicy
34 | except ImportError:
35 |     from .IndexPolicy import IndexPolicy
36 | 
37 | try:
38 |     import UCB as UCBMODULE
39 |     UCB = UCBMODULE.UCB
40 | except ImportError:
41 |     from .UCB import UCB
42 | 
43 | #: Default parameter for alpha
44 | cdef float ALPHA
45 | ALPHA = 1
46 | ALPHA = 4
47 | 
48 | 
49 | cdef float UCBindex(float reward, float pull, float t, int arm, float alpha=ALPHA):
50 |     if pull < 1:
51 |         return float('+inf')
52 |     else:
53 |         return (reward / pull) + sqrt((alpha * log(t)) / (2 * pull))
54 | 
55 | 
56 | class UCBcython(UCB):
57 |     """ The UCB1 (UCB-alpha) index policy, using a Cython extension.
58 | 
59 |     - Reference: [Auer et al. 02].
60 |     """
61 | 
62 |     def __init__(self, nbArms, alpha=ALPHA, lower=0., amplitude=1.):
63 |         super(UCBcython, self).__init__(nbArms, lower=lower, amplitude=amplitude)
64 |         assert alpha >= 0, "Error: the alpha parameter for UCBcython class has to be >= 0."  # DEBUG
65 |         self.alpha = alpha  #: Parameter alpha
66 | 
67 |     def __str__(self):
68 |         return r"UCBcython($\alpha={:.3g}$)".format(self.alpha)
69 | 
70 |     def computeIndex(self, arm):
71 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k:
72 | 
73 |         .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\frac{\alpha \log(t)}{2 N_k(t)}}.
74 |         """
75 |         return UCBindex(self.rewards[arm], self.pulls[arm], self.t, self.alpha)
76 |         # if self.pulls[arm] < 1:
77 |         #     return float('+inf')
78 |         # else:
79 |         #     return (self.rewards[arm] / self.pulls[arm]) + sqrt((self.alpha * log(self.t)) / (2 * self.pulls[arm]))
80 | 
81 |     def computeAllIndex(self):
82 |         """ Compute the current indexes for all arms, in a vectorized manner."""
83 |         for arm in range(self.nbArms):
84 |             self.index[arm] = self.computeIndex(arm)
85 |         # indexes = (self.rewards / self.pulls) + np.sqrt((self.alpha * np.log(self.t)) / (2 * self.pulls))
86 |         # indexes[self.pulls < 1] = float('+inf')
87 |         # self.index[:] = indexes
88 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/Experimentals/UCBjulia.jl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env julia
 2 | """
 3 | A small Julia module that defines a simple function, to be used in UCBjulia.py (with pyjulia).
 4 | """
 5 | 
 6 | # Small Julia module to wrap the function that computes a UCB index
 7 | module UCBjulia
 8 |     function index(rewards, pulls, t, arm, alpha=4)
 9 |         if pulls[arm] < 1
10 |             return Inf
11 |         else
12 |             return (rewards[arm] / pulls[arm]) + sqrt((alpha * log(t)) / (2 * pulls[arm]))
13 |         end
14 |     end
15 | end
16 | 
17 | # Small Julia function that computes a UCB index
18 | function index(rewards, pulls, t, arm, alpha=4)
19 |     if pulls[arm] < 1
20 |         return Inf
21 |     else
22 |         return (rewards[arm] / pulls[arm]) + sqrt((alpha * log(t)) / (2 * pulls[arm]))
23 |     end
24 | end
25 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/Experimentals/UCBjulia.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The UCB policy for bounded bandits, with UCB indexes computed with Julia.
 3 | Reference: [Lai & Robbins, 1985].
 4 | 
 5 | .. warning::
 6 | 
 7 |     Using a Julia function *from* Python will not speed up anything, as there is a lot of overhead in the "bridge" protocol used by pyjulia.
 8 |     The idea of using naively a tiny Julia function to speed up computations is basically useless.
 9 | 
10 |     A naive benchmark showed that in this approach, :class:`UCBjulia` (used withing Python) is about 125 times slower (!) than :class:`UCB`.
11 | 
12 | .. warning:: This is only experimental, and purely useless. See https://github.com/SMPyBandits/SMPyBandits/issues/98
13 | """
14 | from __future__ import division, print_function  # Python 2 compatibility
15 | 
16 | __author__ = "Lilian Besson"
17 | __version__ = "0.9"
18 | 
19 | # WARNING: this is a HUGE hack to fix a mystery bug on importing this policy
20 | from sys import path
21 | from os.path import dirname
22 | path.insert(0, '/'.join(dirname(__file__).split('/')[:-1]))
23 | try:
24 |     from .IndexPolicy import IndexPolicy
25 | except ImportError:
26 |     from IndexPolicy import IndexPolicy
27 | 
28 | 
29 | class UCBjulia(IndexPolicy):
30 |     """ The UCB policy for bounded bandits, with UCB indexes computed with Julia.
31 |     Reference: [Lai & Robbins, 1985].
32 | 
33 |     .. warning:: This is only experimental, and purely useless. See https://github.com/SMPyBandits/SMPyBandits/issues/98
34 |     """
35 | 
36 |     def __init__(self, nbArms, lower=0., amplitude=1.):
37 |         """ Will fail directly if the bridge with julia is unavailable or buggy."""
38 |         super(UCBjulia, self).__init__(nbArms, lower=lower, amplitude=amplitude)
39 |         self.t = 0
40 |         # Importing the julia module and creating the bridge
41 |         try:
42 |             import julia
43 |         except ImportError as e:
44 |             print("Error: unable to load the 'julia' Python module. Install with 'pip install julia', or see https://github.com/JuliaPy/pyjulia/")  # DEBUG
45 |             raise e
46 |         _j = julia.Julia()
47 |         try:
48 |             self._index_function = _j.evalfile("Policies/UCBjulia.jl")
49 |         except RuntimeError:
50 |             try:
51 |                 self._index_function = _j.evalfile("UCBjulia.jl")
52 |             except RuntimeError:
53 |                 raise ValueError("Error: Unable to load 'UCBjulia.jl' julia file.")  # WARNING
54 |         try:
55 |             self._index_function([1], [1], 1, 1)
56 |         except (RuntimeError, ValueError):
57 |             raise ValueError("Error: the index function loaded from 'UCBjulia.jl' is bugged or unavailable.")  # WARNING
58 | 
59 |     def computeIndex(self, arm):
60 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k:
61 | 
62 |         .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\frac{2 \log(t)}{N_k(t)}}.
63 |         """
64 |         # WARNING: the 'arm + 1' part comes from the difference between 0-based indexes
65 |         # for Python and the 1-based indexes in Julia. The rest works pretty well!
66 |         return self._index_function(self.rewards, self.pulls, self.t, arm + 1)
67 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/Experimentals/UCBlog10.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | r""" The UCB policy for bounded bandits, using :math:`\log10(t)` and not :math:`\log(t)` for UCB index.
 3 | Reference: [Lai & Robbins, 1985].
 4 | """
 5 | from __future__ import division, print_function  # Python 2 compatibility
 6 | 
 7 | __author__ = "Lilian Besson"
 8 | __version__ = "0.1"
 9 | 
10 | from math import sqrt, log10
11 | import numpy as np
12 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
13 | 
14 | # WARNING: this is a HUGE hack to fix a mystery bug on importing this policy
15 | from sys import path
16 | from os.path import dirname
17 | path.insert(0, '/'.join(dirname(__file__).split('/')[:-1]))
18 | 
19 | try:
20 |     from .IndexPolicy import IndexPolicy
21 | except ImportError:
22 |     from IndexPolicy import IndexPolicy
23 | 
24 | 
25 | class UCBlog10(IndexPolicy):
26 |     r""" The UCB policy for bounded bandits, using :math:`\log10(t)` and not :math:`\log(t)` for UCB index.
27 |     Reference: [Lai & Robbins, 1985].
28 |     """
29 | 
30 |     def computeIndex(self, arm):
31 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k:
32 | 
33 |         .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\frac{2 \log_{10}(t)}{N_k(t)}}.
34 |         """
35 |         if self.pulls[arm] < 1:
36 |             return float('+inf')
37 |         else:
38 |             return (self.rewards[arm] / self.pulls[arm]) + sqrt((2 * log10(self.t)) / self.pulls[arm])
39 | 
40 |     def computeAllIndex(self):
41 |         """ Compute the current indexes for all arms, in a vectorized manner."""
42 |         indexes = (self.rewards / self.pulls) + np.sqrt((2 * np.log10(self.t)) / self.pulls)
43 |         indexes[self.pulls < 1] = float('+inf')
44 |         self.index[:] = indexes
45 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/Experimentals/UCBlog10alpha.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | r""" The UCB1 (UCB-alpha) index policy, modified to take a random permutation order for the initial exploration of each arm (reduce collisions in the multi-players setting).
 3 | Note: :math:`\log10(t)` and not :math:`\log(t)` for UCB index.
 4 | Reference: [Auer et al. 02].
 5 | """
 6 | from __future__ import division, print_function  # Python 2 compatibility
 7 | 
 8 | __author__ = "Lilian Besson"
 9 | __version__ = "0.2"
10 | 
11 | from math import sqrt, log10
12 | import numpy as np
13 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
14 | 
15 | # WARNING: this is a HUGE hack to fix a mystery bug on importing this policy
16 | from sys import path
17 | from os.path import dirname
18 | path.insert(0, '/'.join(dirname(__file__).split('/')[:-1]))
19 | try:
20 |     from .UCBlog10 import UCBlog10
21 | except ImportError:
22 |     from UCBlog10 import UCBlog10
23 | 
24 | #: Default parameter for alpha
25 | ALPHA = 4
26 | ALPHA = 1
27 | 
28 | 
29 | class UCBlog10alpha(UCBlog10):
30 |     r""" The UCB1 (UCB-alpha) index policy, modified to take a random permutation order for the initial exploration of each arm (reduce collisions in the multi-players setting).
31 |     Note: :math:`\log10(t)` and not :math:`\log(t)` for UCB index.
32 |     Reference: [Auer et al. 02].
33 |     """
34 | 
35 |     def __init__(self, nbArms, alpha=ALPHA, lower=0., amplitude=1.):
36 |         super(UCBlog10alpha, self).__init__(nbArms, lower=lower, amplitude=amplitude)
37 |         assert alpha >= 0, "Error: the alpha parameter for UCBalpha class has to be >= 0."  # DEBUG
38 |         self.alpha = alpha  #: Parameter alpha
39 | 
40 |     def __str__(self):
41 |         return r"UCB($\alpha={:.3g}$, {})".format(self.alpha, r"$\log_{10}$")
42 | 
43 |     def computeIndex(self, arm):
44 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k:
45 | 
46 |         .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\frac{\alpha \log_{10}(t)}{2 N_k(t)}}.
47 |         """
48 |         if self.pulls[arm] < 1:
49 |             return float('+inf')
50 |         else:
51 |             return (self.rewards[arm] / self.pulls[arm]) + sqrt((self.alpha * log10(self.t)) / (2 * self.pulls[arm]))
52 | 
53 |     def computeAllIndex(self):
54 |         """ Compute the current indexes for all arms, in a vectorized manner."""
55 |         indexes = (self.rewards / self.pulls) + np.sqrt((self.alpha * np.log10(self.t)) / (2 * self.pulls))
56 |         indexes[self.pulls < 1] = float('+inf')
57 |         self.index[:] = indexes
58 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/Experimentals/UCBwrong.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | r""" The UCBwrong policy for bounded bandits, like UCB but with a typo on the estimator of means:
 3 | :math:`\frac{X_k(t)}{t}` is used instead of :math:`\frac{X_k(t)}{N_k(t)}`.
 4 | 
 5 | One paper of W.Jouini, C.Moy and J.Palicot from 2009 contained this typo, I reimplemented it just to check that:
 6 | 
 7 | - its performance is worse than simple UCB,
 8 | - but not that bad...
 9 | """
10 | from __future__ import division, print_function  # Python 2 compatibility
11 | 
12 | __author__ = "Lilian Besson"
13 | __version__ = "0.1"
14 | 
15 | from math import sqrt, log
16 | import numpy as np
17 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
18 | 
19 | # WARNING: this is a HUGE hack to fix a mystery bug on importing this policy
20 | from sys import path
21 | from os.path import dirname
22 | path.insert(0, '/'.join(dirname(__file__).split('/')[:-1]))
23 | try:
24 |     from .IndexPolicy import IndexPolicy
25 | except ImportError:
26 |     from IndexPolicy import IndexPolicy
27 | 
28 | 
29 | class UCBwrong(IndexPolicy):
30 |     """ The UCBwrong policy for bounded bandits, like UCB but with a typo on the estimator of means.
31 | 
32 |     One paper of W.Jouini, C.Moy and J.Palicot from 2009 contained this typo, I reimplemented it just to check that:
33 | 
34 |     - its performance is worse than simple UCB
35 |     - but not that bad...
36 |     """
37 | 
38 |     def computeIndex(self, arm):
39 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k:
40 | 
41 |         .. math:: I_k(t) = \frac{X_k(t)}{t} + \sqrt{\frac{2 \log(t)}{N_k(t)}}.
42 |         """
43 |         if self.pulls[arm] < 1:
44 |             return float('+inf')
45 |         else:
46 |             # XXX Volontary typo, wrong mean estimate
47 |             return (self.rewards[arm] / self.t) + sqrt((2 * log(self.t)) / self.pulls[arm])
48 | 
49 |     def computeAllIndex(self):
50 |         """ Compute the current indexes for all arms, in a vectorized manner."""
51 |         indexes = (self.rewards / self.t) + np.sqrt((2 * np.log(self.t)) / self.pulls)
52 |         indexes[self.pulls < 1] = float('+inf')
53 |         self.index[:] = indexes
54 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/Experimentals/klUCBlog10.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | r""" The generic kl-UCB policy for one-parameter exponential distributions.
 3 | By default, it assumes Bernoulli arms.
 4 | Note: using :math:`\log10(t)` and not :math:`\log(t)` for the KL-UCB index.
 5 | Reference: [Garivier & Cappé - COLT, 2011].
 6 | """
 7 | from __future__ import division, print_function  # Python 2 compatibility
 8 | 
 9 | __author__ = "Lilian Besson"
10 | __version__ = "0.5"
11 | 
12 | from math import log10
13 | import numpy as np
14 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
15 | 
16 | # WARNING: this is a HUGE hack to fix a mystery bug on importing this policy
17 | from sys import path
18 | from os.path import dirname
19 | path.insert(0, '/'.join(dirname(__file__).split('/')[:-1]))
20 | try:
21 |     from .klUCB import klUCB
22 | except ImportError:
23 |     from klUCB import klUCB
24 | 
25 | 
26 | class klUCBlog10(klUCB):
27 |     r""" The generic kl-UCB policy for one-parameter exponential distributions.
28 |     By default, it assumes Bernoulli arms.
29 |     Note: using :math:`\log10(t)` and not :math:`\log(t)` for the KL-UCB index.
30 |     Reference: [Garivier & Cappé - COLT, 2011].
31 |     """
32 | 
33 |     def __str__(self):
34 |         return r"kl-UCB({}{}{})".format("" if self.c == 1 else r"$c={:.3g}$, ".format(self.c), r"$\log_{10}$, ", self.klucb.__name__[5:])
35 | 
36 |     def computeIndex(self, arm):
37 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k:
38 | 
39 |         .. math::
40 | 
41 |             \hat{\mu}_k(t) &= \frac{X_k(t)}{N_k(t)}, \\
42 |             U_k(t) &= \sup\limits_{q \in [a, b]} \left\{ q : \mathrm{kl}(\hat{\mu}_k(t), q) \leq \frac{c \log_{10}(t)}{N_k(t)} \right\},\\
43 |             I_k(t) &= U_k(t).
44 | 
45 |         If rewards are in :math:`[a, b]` (default to :math:`[0, 1]`) and :math:`\mathrm{kl}(x, y)` is the Kullback-Leibler divergence between two distributions of means x and y (see :mod:`Arms.kullback`),
46 |         and c is the parameter (default to 1).
47 |         """
48 |         if self.pulls[arm] < 1:
49 |             return float('+inf')
50 |         else:
51 |             # XXX We could adapt tolerance to the value of self.t
52 |             return self.klucb(self.rewards[arm] / self.pulls[arm], self.c * log10(self.t) / self.pulls[arm], self.tolerance)
53 | 
54 |     def computeAllIndex(self):
55 |         """ Compute the current indexes for all arms, in a vectorized manner."""
56 |         indexes = self.klucb_vect(self.rewards / self.pulls, self.c * np.log10(self.t) / self.pulls, self.tolerance)
57 |         indexes[self.pulls < 1] = float('+inf')
58 |         self.index[:] = indexes
59 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/Experimentals/klUCBloglog10.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | r""" The generic kl-UCB policy for one-parameter exponential distributions.
 3 | By default, it assumes Bernoulli arms.
 4 | Note: using :math:`\log10(t)` and not :math:`\log(t)` for the KL-UCB index.
 5 | Reference: [Garivier & Cappé - COLT, 2011].
 6 | """
 7 | from __future__ import division, print_function  # Python 2 compatibility
 8 | 
 9 | __author__ = "Lilian Besson"
10 | __version__ = "0.5"
11 | 
12 | from math import log10
13 | import numpy as np
14 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
15 | 
16 | # WARNING: this is a HUGE hack to fix a mystery bug on importing this policy
17 | from sys import path
18 | from os.path import dirname
19 | path.insert(0, '/'.join(dirname(__file__).split('/')[:-1]))
20 | try:
21 |     from .klUCB import klUCB
22 | except ImportError:
23 |     from klUCB import klUCB
24 | 
25 | 
26 | class klUCBloglog10(klUCB):
27 |     r""" The generic kl-UCB policy for one-parameter exponential distributions.
28 |     By default, it assumes Bernoulli arms.
29 |     Note: using :math:`\log10(t)` and not :math:`\log(t)` for the KL-UCB index.
30 |     Reference: [Garivier & Cappé - COLT, 2011].
31 |     """
32 | 
33 |     def __str__(self):
34 |         return r"kl-UCB({}{}{})".format("" if self.c == 1 else r"$c={:.3g}$, ".format(self.c), r"$\log_{10}\log_{10}$, ", self.klucb.__name__[5:])
35 | 
36 |     def computeIndex(self, arm):
37 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k:
38 | 
39 |         .. math::
40 | 
41 |             \hat{\mu}_k(t) &= \frac{X_k(t)}{N_k(t)}, \\
42 |             U_k(t) &= \sup\limits_{q \in [a, b]} \left\{ q : \mathrm{kl}(\hat{\mu}_k(t), q) \leq \frac{\log_{10}(t) + c \log(\max(1, \log_{10}(t)))}{N_k(t)} \right\},\\
43 |             I_k(t) &= U_k(t).
44 | 
45 |         If rewards are in :math:`[a, b]` (default to :math:`[0, 1]`) and :math:`\mathrm{kl}(x, y)` is the Kullback-Leibler divergence between two distributions of means x and y (see :mod:`Arms.kullback`),
46 |         and c is the parameter (default to 1).
47 |         """
48 |         if self.pulls[arm] < 1:
49 |             return float('+inf')
50 |         else:
51 |             # XXX We could adapt tolerance to the value of self.t
52 |             return self.klucb(self.rewards[arm] / self.pulls[arm], (log10(self.t) + self.c * log10(max(1, log10(self.t)))) / self.pulls[arm], self.tolerance)
53 | 
54 |     def computeAllIndex(self):
55 |         """ Compute the current indexes for all arms, in a vectorized manner."""
56 |         indexes = self.klucb_vect(self.rewards / self.pulls, (np.log10(self.t) + self.c * np.log10(np.maximum(1., np.log10(self.t)))) / self.pulls, self.tolerance)
57 |         indexes[self.pulls < 1] = float('+inf')
58 |         self.index[:] = indexes
59 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/Experimentals/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Basic setup.py to compile a Cython extension.
 4 | It is used to compile the ``UCBoost_faster_cython``, ``UCBoost_cython``, ``UCBcython`` extension, by running::
 5 | 
 6 |     $ python setup.py build_ext --inplace
 7 | 
 8 | You can also use [pyximport](http://docs.cython.org/en/latest/src/tutorial/cython_tutorial.html#pyximport-cython-compilation-for-developers) to import the ``kullback_cython`` module transparently:
 9 | 
10 | >>> import pyximport; pyximport.install()
11 | >>> import kullback_cython as kullback
12 | >>> # then use kullback.klucbBern or others, as if they came from the pure Python version!
13 | """
14 | from distutils.core import setup
15 | from distutils.extension import Extension
16 | from Cython.Build import cythonize
17 | 
18 | extensions = [
19 |     # Extension("UCBoost_faster_cython", ["UCBoost_faster_cython.pyx"]),
20 |     # XXX also build the extension with full name?
21 |     Extension("SMPyBandits.Policies.Experimentals.UCBoost_faster_cython", ["UCBoost_faster_cython.pyx"]),
22 |     # Extension("UCBoost_cython", ["UCBoost_cython.pyx"]),
23 |     # XXX also build the extension with full name?
24 |     Extension("SMPyBandits.Policies.Experimentals.UCBoost_cython", ["UCBoost_cython.pyx"]),
25 |     # Extension("UCBcython", ["UCBcython.pyx"]),
26 |     # XXX also build the extension with full name?
27 |     Extension("SMPyBandits.Policies.Experimentals.UCBcython", ["UCBcython.pyx"]),
28 | ]
29 | 
30 | setup(
31 |     ext_modules = cythonize(extensions, compiler_directives={
32 |         'embedsignature': True,
33 |         'language_level': 3,
34 |         'warn.undeclared': True,
35 |         'warn.unreachable': True,
36 |         'warn.maybe_uninitialized': True,
37 |         'warn.unused': True,
38 |         'warn.unused_arg': True,
39 |         'warn.unused_result': True,
40 |         'warn.multiple_declarators': True,
41 |     })
42 | )
43 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/GreedyOracle.py:
--------------------------------------------------------------------------------
 1 | """
 2 | author: Julien Seznec
 3 | 
 4 | Oracle and near-minimax policy for rotting bandits without noise.
 5 | 
 6 | Reference: [Heidari et al., 2016, https://www.ijcai.org/Proceedings/16/Papers/224.pdf]
 7 | Tight Policy Regret Bounds for Improving and Decaying Bandits.
 8 | Hoda Heidari, Michael Kearns, Aaron Roth.
 9 | International Joint Conference on Artificial Intelligence (IJCAI) 2016, 1562.
10 | """
11 | from .IndexPolicy import IndexPolicy
12 | import numpy as np
13 | 
14 | class GreedyPolicy(IndexPolicy):
15 |     """
16 |     Greedy Policy for rotting bandits (A2 in the reference below).
17 |     Selects arm with best last value.
18 |     Reference: [Heidari et al., 2016, https://www.ijcai.org/Proceedings/16/Papers/224.pdf]
19 |     """
20 |     def __init__(self, nbArms):
21 |         super(GreedyPolicy, self).__init__(nbArms)
22 |         self.last_pull = [np.inf for _ in range(nbArms)]
23 | 
24 |     def getReward(self, arm, reward):
25 |         super(GreedyPolicy, self).getReward(arm, reward)
26 |         self.last_pull[arm] = reward
27 | 
28 |     def computeAllIndex(self):
29 |         return self.last_pull
30 | 
31 |     def computeIndex(self,arm):
32 |         """ Compute the mean of the h last value """
33 |         return self.last_pull[arm]
34 | 
35 |     def startGame(self):
36 |         super(GreedyPolicy, self).startGame()
37 |         self.last_pull = [np.inf for _ in self.last_pull]
38 | 
39 | 
40 | class GreedyOracle(IndexPolicy):
41 |     """
42 |     Greedy Oracle for rotting bandits (A0 in the reference below).
43 |     Look 1 step forward and select next best value.
44 |     Optimal policy for rotting bandits problem.
45 |     Reference: [Heidari et al., 2016, https://www.ijcai.org/Proceedings/16/Papers/224.pdf]
46 |     """
47 |     def __init__(self,nbArms, arms):
48 |         super(GreedyOracle, self).__init__(nbArms)
49 |         self.arms = arms
50 | 
51 |     def computeIndex(self, arm):
52 |         return self.arms[arm].mean
53 | 
54 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/H_UCB.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import division, print_function  # Python 2 compatibility
 3 | 
 4 | __author__ = "SlyJabiru"
 5 | __version__ = "0.1"
 6 | 
 7 | 
 8 | from math import sqrt, log
 9 | import numpy as np
10 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
11 | 
12 | 
13 | try:
14 |     from .StrategicIndexPolicy import StrategicIndexPolicy
15 | except ImportError:
16 |     from StrategicIndexPolicy import StrategicIndexPolicy
17 | 
18 | 
19 | class H_UCB(StrategicIndexPolicy):
20 |     def computeAgentIndex(self, agent):
21 |         if self.agentPulls[agent] < 1:
22 |             return float('+inf')
23 |         else:
24 |             return (self.agentRewards[agent] / self.agentPulls[agent]) + sqrt((2 * log(self.t)) / self.agentPulls[agent])
25 | 
26 |     def computeArmIndex(self, arm):
27 |         if self.armPulls[arm] < 1:
28 |             return float('+inf')
29 |         else:
30 |             armPossession = np.cumsum(self.nbArmsPerAgents) - 1
31 |             temp = (armPossession >= arm)
32 |             agent = np.where(temp)[0][0]
33 |             return (self.armRewards[arm] / self.armPulls[arm]) + sqrt((2 * log(self.agentPulls[agent])) / self.armPulls[arm])
34 | 
35 |     def computeAllIndex(self):
36 |         """ Compute the current indices for all agent and all arms, in a vectorized manner."""
37 |         agentIndices = (self.agentRewards / self.agentPulls) + np.sqrt((2 * np.log(self.t)) / self.agentPulls)
38 | 
39 |         agentPullsRepeated = np.repeat(self.agentPulls, self.nbArmsPerAgents)
40 |         armIndices = (self.armRewards / self.armPulls) + np.sqrt((2 * np.log(agentPullsRepeated)) / self.armPulls)
41 | 
42 |         agentIndices[self.agentPulls < 1] = float('+inf')
43 |         armIndices[self.armPulls < 1] = float('+inf')
44 |         
45 |         self.agentIndex[:] = agentIndices
46 |         self.armIndex[:] = armIndices
47 |         
48 | 
49 | # --- Debugging
50 | 
51 | # if __name__ == "__main__":
52 | #     # Code for debugging purposes.
53 | #     from doctest import testmod
54 | #     print("\nTesting automatically all the docstring written in each functions of this module :")
55 | #     testmod(verbose=True)
56 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/MOSS.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The MOSS policy for bounded bandits.
 3 | Reference: [Audibert & Bubeck, 2010](http://www.jmlr.org/papers/volume11/audibert10a/audibert10a.pdf).
 4 | """
 5 | from __future__ import division, print_function  # Python 2 compatibility
 6 | 
 7 | __author__ = "Lilian Besson"
 8 | __version__ = "0.1"
 9 | 
10 | import numpy as np
11 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
12 | 
13 | try:
14 |     from .IndexPolicy import IndexPolicy
15 | except ImportError:
16 |     from IndexPolicy import IndexPolicy
17 | 
18 | 
19 | class MOSS(IndexPolicy):
20 |     """ The MOSS policy for bounded bandits.
21 |     Reference: [Audibert & Bubeck, 2010](http://www.jmlr.org/papers/volume11/audibert10a/audibert10a.pdf).
22 |     """
23 | 
24 |     def computeIndex(self, arm):
25 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k, if there is K arms:
26 | 
27 |         .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\max\left(0, \frac{\log\left(\frac{t}{K N_k(t)}\right)}{N_k(t)}\right)}.
28 |         """
29 |         if self.pulls[arm] < 1:
30 |             return float('+inf')
31 |         else:
32 |             return (self.rewards[arm] / self.pulls[arm]) + np.sqrt(max(0, np.log(self.t / (self.nbArms * self.pulls[arm]))) / self.pulls[arm])
33 | 
34 |     def computeAllIndex(self):
35 |         """ Compute the current indexes for all arms, in a vectorized manner."""
36 |         indexes = (self.rewards / self.pulls) + np.sqrt(np.maximum(0., np.log(self.t / (self.nbArms * self.pulls))) / self.pulls)
37 |         indexes[self.pulls < 1] = float('+inf')
38 |         self.index[:] = indexes
39 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/MOSSAnytime.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The MOSS-Anytime policy for bounded bandits, without knowing the horizon (and no doubling trick).
 3 | Reference: [Degenne & Perchet, 2016](http://proceedings.mlr.press/v48/degenne16.pdf).
 4 | """
 5 | from __future__ import division, print_function  # Python 2 compatibility
 6 | 
 7 | __author__ = "Lilian Besson"
 8 | __version__ = "0.9"
 9 | 
10 | import numpy as np
11 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
12 | 
13 | try:
14 |     from .MOSS import MOSS
15 | except ImportError:
16 |     from MOSS import MOSS
17 | 
18 | 
19 | #: Default value for the parameter :math:`\alpha` for the MOSS-Anytime algorithm.
20 | ALPHA = 1.0
21 | 
22 | 
23 | class MOSSAnytime(MOSS):
24 |     """ The MOSS-Anytime policy for bounded bandits, without knowing the horizon (and no doubling trick).
25 |     Reference: [Degenne & Perchet, 2016](http://proceedings.mlr.press/v48/degenne16.pdf).
26 |     """
27 | 
28 |     def __init__(self, nbArms, alpha=ALPHA, lower=0., amplitude=1.):
29 |         super(MOSSAnytime, self).__init__(nbArms, lower=lower, amplitude=amplitude)
30 |         self.alpha = alpha  #: Parameter :math:`\alpha \geq 0` for the computations of the index. Optimal value seems to be :math:`1.35`.
31 | 
32 |     def __str__(self):
33 |         return r"MOSS-Anytime($\alpha={}$)".format(self.alpha)
34 | 
35 |     def computeIndex(self, arm):
36 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k, if there is K arms:
37 | 
38 |         .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\left(\frac{1+\alpha}{2}\right) \max\left(0, \frac{\log\left(\frac{t}{K N_k(t)}\right)}{N_k(t)}\right)}.
39 |         """
40 |         if self.pulls[arm] < 1:
41 |             return float('+inf')
42 |         else:
43 |             return (self.rewards[arm] / self.pulls[arm]) + np.sqrt(((1. + self.alpha) / 2.) * max(0, np.log(self.t / (self.nbArms * self.pulls[arm]))) / self.pulls[arm])
44 | 
45 |     def computeAllIndex(self):
46 |         """ Compute the current indexes for all arms, in a vectorized manner."""
47 |         indexes = (self.rewards / self.pulls) + np.sqrt(((1. + self.alpha) / 2.) * np.maximum(0., np.log(self.t / (self.nbArms * self.pulls))) / self.pulls)
48 |         indexes[self.pulls < 1] = float('+inf')
49 |         self.index[:] = indexes
50 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/MOSSExperimental.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The MOSS-Experimental policy for bounded bandits, without knowing the horizon (and no doubling trick).
 3 | Reference: [Degenne & Perchet, 2016](http://proceedings.mlr.press/v48/degenne16.pdf).
 4 | 
 5 | .. warning:: Nothing was proved for this heuristic!
 6 | """
 7 | from __future__ import division, print_function  # Python 2 compatibility
 8 | 
 9 | __author__ = "Lilian Besson"
10 | __version__ = "0.9"
11 | 
12 | from numpy import sqrt, log
13 | import numpy as np
14 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
15 | 
16 | try:
17 |     from .MOSS import MOSS
18 | except ImportError:
19 |     from MOSS import MOSS
20 | 
21 | 
22 | class MOSSExperimental(MOSS):
23 |     """ The MOSS-Experimental policy for bounded bandits, without knowing the horizon (and no doubling trick).
24 |     Reference: [Degenne & Perchet, 2016](http://proceedings.mlr.press/v48/degenne16.pdf).
25 |     """
26 | 
27 |     def __str__(self):
28 |         return "MOSS-Experimental"
29 | 
30 |     def computeIndex(self, arm):
31 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k, if there is K arms:
32 | 
33 |         .. math::
34 | 
35 |             I_k(t) &= \frac{X_k(t)}{N_k(t)} + \sqrt{ \max\left(0, \frac{\log\left(\frac{t}{\hat{H}(t)}\right)}{N_k(t)}\right)},\\
36 |             \text{where}\;\; \hat{H}(t) &:= \begin{cases}
37 |                 \sum\limits_{j=1, N_j(t) < \sqrt{t}}^{K} N_j(t) & \;\text{if it is}\; > 0,\\
38 |                 K N_k(t) & \;\text{otherwise}\;
39 |             \end{cases}
40 | 
41 |         .. note:: In the article, the authors do not explain this subtlety, and I don't see an argument to justify that at anytime, :math:`\hat{H}(t) > 0` ie to justify that there is always some arms :math:`j` such that :math:`0 < N_j(t) < \sqrt{t}`.
42 |         """
43 |         if self.pulls[arm] < 1:
44 |             return float('+inf')
45 |         else:
46 |             pulls_of_suboptimal_arms = np.sum(self.pulls[self.pulls < np.sqrt(self.t)])
47 |             if pulls_of_suboptimal_arms > 0:
48 |                 return (self.rewards[arm] / self.pulls[arm]) + np.sqrt(0.5 * max(0, np.log(self.t / pulls_of_suboptimal_arms)) / self.pulls[arm])
49 |             else:
50 |                 return (self.rewards[arm] / self.pulls[arm]) + np.sqrt(0.5 * max(0, np.log(self.t / (self.nbArms * self.pulls[arm]))) / self.pulls[arm])
51 | 
52 |     def computeAllIndex(self):
53 |         """ Compute the current indexes for all arms, in a vectorized manner."""
54 |         pulls_of_suboptimal_arms = np.sum(self.pulls[self.pulls < np.sqrt(self.t)])
55 |         if pulls_of_suboptimal_arms > 0:
56 |             indexes = (self.rewards / self.pulls) + np.sqrt(0.5 * np.maximum(0, np.log(self.t / pulls_of_suboptimal_arms)) / self.pulls)
57 |         else:
58 |             indexes = (self.rewards / self.pulls) + np.sqrt(0.5 * np.maximum(0, np.log(self.t / (self.nbArms * self.pulls))) / self.pulls)
59 |         indexes[self.pulls < 1] = float('+inf')
60 |         self.index[:] = indexes
61 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/MOSSH.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The MOSS-H policy for bounded bandits, with knowing the horizon.
 3 | Reference: [Audibert & Bubeck, 2010](http://www.jmlr.org/papers/volume11/audibert10a/audibert10a.pdf).
 4 | """
 5 | from __future__ import division, print_function  # Python 2 compatibility
 6 | 
 7 | __author__ = "Lilian Besson"
 8 | __version__ = "0.5"
 9 | 
10 | import numpy as np
11 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
12 | 
13 | try:
14 |     from .MOSS import MOSS
15 | except ImportError:
16 |     from MOSS import MOSS
17 | 
18 | 
19 | class MOSSH(MOSS):
20 |     """ The MOSS-H policy for bounded bandits, with knowing the horizon.
21 |     Reference: [Audibert & Bubeck, 2010](http://www.jmlr.org/papers/volume11/audibert10a/audibert10a.pdf).
22 |     """
23 | 
24 |     def __init__(self, nbArms, horizon=None, lower=0., amplitude=1.):
25 |         super(MOSSH, self).__init__(nbArms, lower=lower, amplitude=amplitude)
26 |         self.horizon = int(horizon)  #: Parameter :math:`T` = known horizon of the experiment.
27 | 
28 |     def __str__(self):
29 |         return r"MOSS-H($T={}$)".format(self.horizon)
30 | 
31 |     def computeIndex(self, arm):
32 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k, if there is K arms:
33 | 
34 |         .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\max\left(0, \frac{\log\left(\frac{T}{K N_k(t)}\right)}{N_k(t)}\right)}.
35 |         """
36 |         if self.pulls[arm] < 1:
37 |             return float('+inf')
38 |         else:
39 |             return (self.rewards[arm] / self.pulls[arm]) + np.sqrt(max(0, np.log(self.horizon / (self.nbArms * self.pulls[arm]))) / self.pulls[arm])
40 | 
41 |     def computeAllIndex(self):
42 |         """ Compute the current indexes for all arms, in a vectorized manner."""
43 |         indexes = (self.rewards / self.pulls) + np.sqrt(np.maximum(0., np.log(self.horizon / (self.nbArms * self.pulls))) / self.pulls)
44 |         indexes[self.pulls < 1] = float('+inf')
45 |         self.index[:] = indexes
46 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/Makefile:
--------------------------------------------------------------------------------
 1 | # Basic Makefile to compile a Cython extension.
 2 | # It is used to compile the cython_extensions extension, by running 'make cython_extensions'
 3 | 
 4 | cython_extensions3:	cython_extensions
 5 | cython_extensions:
 6 | 	python3 setup.py build_ext --inplace
 7 | 	-cp -vf SMPyBandits/Policies/*.so ./
 8 | 	-chmod -x ./*.so
 9 | 	-chmod g-w ./*.so
10 | 	-chmod o-w ./*.so
11 | 	-ls -larth ./*.so
12 | 	-rm -vfr ./build ./*.c
13 | 	# -mv -vf ./SMPyBandits /tmp/
14 | 
15 | cython_extensions2:
16 | 	python2 setup.py build_ext --inplace
17 | 	-cp -vf SMPyBandits/Policies/*.so ./
18 | 	-chmod -x ./*.so
19 | 	-chmod g-w ./*.so
20 | 	-chmod o-w ./*.so
21 | 	-ls -larth ./*.so
22 | 	-rm -vfr ./build ./*.c
23 | 	# -mv -vf ./SMPyBandits /tmp/
24 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/OCUCB.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The Optimally Confident UCB (OC-UCB) policy for bounded stochastic bandits, with sub-Gaussian noise.
 3 | 
 4 | - Reference: [Lattimore, 2016](https://arxiv.org/pdf/1603.08661.pdf).
 5 | - There is also a horizon-dependent version, :class:`OCUCBH.OCUCBH`, from [Lattimore, 2015](https://arxiv.org/pdf/1507.07880.pdf).
 6 | """
 7 | from __future__ import division, print_function  # Python 2 compatibility
 8 | 
 9 | __author__ = "Lilian Besson"
10 | __version__ = "0.9"
11 | 
12 | from math import exp, sqrt, log
13 | import numpy as np
14 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
15 | 
16 | try:
17 |     from .UCB import UCB
18 | except ImportError:
19 |     from UCB import UCB
20 | 
21 | #: Default value for parameter :math:`\eta > 1` for OCUCB.
22 | ETA = 2
23 | 
24 | #: Default value for parameter :math:`\rho \in (1/2, 1]` for OCUCB.
25 | RHO = 1
26 | 
27 | 
28 | class OCUCB(UCB):
29 |     """ The Optimally Confident UCB (OC-UCB) policy for bounded stochastic bandits, with sub-Gaussian noise.
30 | 
31 |     - Reference: [Lattimore, 2016](https://arxiv.org/pdf/1603.08661.pdf).
32 |     """
33 | 
34 |     def __init__(self, nbArms, eta=ETA, rho=RHO, lower=0., amplitude=1.):
35 |         super(OCUCB, self).__init__(nbArms, lower=lower, amplitude=amplitude)
36 |         assert eta > 1, "Error: parameter 'eta' for OCUCB algorithm has to be > 1."  # DEBUG
37 |         self.eta = eta  #: Parameter :math:`\eta > 1`.
38 |         assert 0.5 < rho <= 1, "Error: parameter 'rho' for OCUCB algorithm has to be in (1/2, 1]."  # DEBUG
39 |         self.rho = rho  #: Parameter :math:`\rho \in (1/2, 1]`.
40 | 
41 |     def __str__(self):
42 |         return r"OC-UCB($\eta={:.3g}$, $\rho={:.3g}$)".format(self.eta, self.rho)
43 | 
44 |     def _Bterm(self, k):
45 |         r""" Compute the extra term :math:`B_k(t)` as follows:
46 | 
47 |         .. math::
48 | 
49 |             B_k(t) &= \max\Big\{ \exp(1), \log(t), t \log(t) / C_k(t) \Big\},\\
50 |             \text{where}\; C_k(t) &= \sum_{j=1}^{K} \min\left\{ T_k(t), T_j(t)^{\rho} T_k(t)^{1 - \rho} \right\}
51 |         """
52 |         t = self.t
53 |         T_ = self.pulls
54 |         C_kt = sum(min(T_[k], (T_[j] ** self.rho) * (T_[k] ** (1. - self.rho))) for j in range(self.nbArms))
55 |         return max([exp(1), log(t), t * log(t) / C_kt])
56 | 
57 |     def _Bterms(self):
58 |         r""" Compute all the extra terms, :math:`B_k(t)` for each arm k, in a naive manner, not optimized to be vectorial, but it works."""
59 |         return np.array([self._Bterm(k) for k in range(self.nbArms)])
60 | 
61 |     def computeIndex(self, arm):
62 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k:
63 | 
64 |         .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\frac{2 \eta \log(B_k(t))}{N_k(t)}}.
65 | 
66 |         - Where :math:`\eta` is a parameter of the algorithm,
67 |         - And :math:`B_k(t)` is the additional term defined above.
68 |         """
69 |         if self.pulls[arm] < 1:
70 |             return float('+inf')
71 |         else:
72 |             return (self.rewards[arm] / self.pulls[arm]) + sqrt(2 * self.eta * log(self._Bterm(arm)) / self.pulls[arm])
73 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/PHE.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The PHE, Perturbed-History Exploration, policy for bounded bandits.
 3 | 
 4 | - Reference: [[Perturbed-History Exploration in Stochastic Multi-Armed Bandits, by Branislav Kveton, Csaba Szepesvari, Mohammad Ghavamzadeh, Craig Boutilier, 26 Feb 2019, arXiv:1902.10089]](https://arxiv.org/abs/1902.10089)
 5 | """
 6 | from __future__ import division, print_function  # Python 2 compatibility
 7 | 
 8 | __author__ = "Lilian Besson"
 9 | __version__ = "0.9"
10 | 
11 | try:
12 |     from .IndexPolicy import IndexPolicy
13 | except ImportError:
14 |     from IndexPolicy import IndexPolicy
15 | 
16 | from math import ceil
17 | import numpy as np
18 | 
19 | #: By default, :math:`a` the perturbation scale in PHE is 1, that is, at current time step t, if there is :math:`s = T_{i,t-1}` samples of arm i, PHE generates :math:`s` pseudo-rewards (of mean :math:`1/2`)
20 | DEFAULT_PERTURBATION_SCALE = 1.0
21 | 
22 | 
23 | class PHE(IndexPolicy):
24 |     """ The PHE, Perturbed-History Exploration, policy for bounded bandits.
25 | 
26 |     - Reference: [[Perturbed-History Exploration in Stochastic Multi-Armed Bandits, by Branislav Kveton, Csaba Szepesvari, Mohammad Ghavamzadeh, Craig Boutilier, 26 Feb 2019, arXiv:1902.10089]](https://arxiv.org/abs/1902.10089)
27 | 
28 |     - They prove that PHE achieves a regret of :math:`\mathcal{O}(K \Delta^{-1} \log(T))` regret for horizon :math:`T`, and if :math:`\Delta` is the minimum gap between the expected rewards of the optimal and suboptimal arms, for :math:`a > 1`.
29 |     - Note that the limit case of :math:`a=0` gives the Follow-the-Leader algorithm (FTL), known to fail.
30 |     """
31 |     def __init__(self, nbArms, perturbation_scale=DEFAULT_PERTURBATION_SCALE, lower=0., amplitude=1.):
32 |         assert perturbation_scale > 0, "Error: for PHE class, the parameter perturbation_scale should be > 0, it was {}.".format(perturbation_scale)  # DEBUG
33 |         self.perturbation_scale = perturbation_scale  #: Perturbation scale, denoted :math:`a` in their paper. Should be a float or int number. With :math:`s` current samples, :math:`\lceil a s \rceil` additional pseudo-rewards are generated.
34 |         super(PHE, self).__init__(nbArms, lower=lower, amplitude=amplitude)
35 | 
36 |     def __str__(self):
37 |         return r"PHE($a={:.3g}$)".format(self.perturbation_scale)
38 | 
39 |     def computeIndex(self, arm):
40 |         """ Compute a randomized index by adding :math:`a` pseudo-rewards (of mean :math:`1/2`) to the current observations of this arm."""
41 |         s = self.pulls[arm]
42 |         if s <= 0:
43 |             return float('+inf')
44 |         V_is = self.rewards[arm]
45 |         number_of_perturbation = ceil(self.perturbation_scale * s)
46 |         U_is = np.random.binomial(number_of_perturbation, 0.5)
47 |         perturbated_mean = (V_is + U_is) / (s + number_of_perturbation)
48 |         return perturbated_mean
49 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/Posterior/Gamma.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ Manipulate a Gamma posterior. No need for tricks to handle non-binary rewards.
 3 | 
 4 | - See https://en.wikipedia.org/wiki/Gamma_distribution#Conjugate_prior
 5 | - And https://en.wikipedia.org/wiki/Conjugate_prior#Continuous_distributions
 6 | """
 7 | from __future__ import division, print_function  # Python 2 compatibility
 8 | 
 9 | __author__ = "Emilie Kaufmann, Lilian Besson"
10 | __version__ = "0.6"
11 | 
12 | try:
13 |     from numpy.random import gamma as gammavariate  # Faster! Yes!
14 | except ImportError:
15 |     from random import gammavariate
16 | 
17 | from scipy.special import gdtrix
18 | 
19 | 
20 | # Local imports
21 | from .Posterior import Posterior
22 | 
23 | 
24 | class Gamma(Posterior):
25 |     """ Manipulate a Gamma posterior."""
26 | 
27 |     def __init__(self, k=1, lmbda=1):
28 |         r"""Create a Gamma posterior, :math:`\Gamma(k, \lambda)`, with :math:`k=1` and :math:`\lambda=1` by default."""
29 |         assert k > 0, "Error: parameter 'k' for Beta posterior has to be > 0."
30 |         self._k = k
31 |         self.k = k  #: Parameter :math:`k`
32 |         assert lmbda > 0, "Error: parameter 'lmbda' for Beta posterior has to be > 0."
33 |         self._lmbda = lmbda
34 |         self.lmbda = lmbda  #: Parameter :math:`\lambda`
35 | 
36 |     def __str__(self):
37 |         return "Gamma({}, {})".format(self.k, self.lmbda)
38 | 
39 |     def reset(self, k=None, lmbda=None):
40 |         """Reset k and lmbda, both to 1 as when creating a new default Gamma."""
41 |         if k is None:
42 |             self.k = self._k
43 |         if lmbda is None:
44 |             self.lmbda = self._lmbda
45 | 
46 |     def sample(self):
47 |         """Get a random sample from the Beta posterior (using :func:`numpy.random.gammavariate`).
48 | 
49 |         - Used only by :class:`Thompson` Sampling and :class:`AdBandits` so far.
50 |         """
51 |         return gammavariate(self.k, 1. / self.lmbda)
52 | 
53 |     def quantile(self, p):
54 |         """Return the p quantile of the Gamma posterior (using :func:`scipy.stats.gdtrix`).
55 | 
56 |         - Used only by :class:`BayesUCB` and :class:`AdBandits` so far.
57 |         """
58 |         return gdtrix(self.k, 1. / self.lmbda, p)
59 | 
60 |     def mean(self):
61 |         """Compute the mean of the Gamma posterior (should be useless)."""
62 |         return self.k / float(self.lmbda)
63 | 
64 |     def forget(self, obs):
65 |         """Forget the last observation."""
66 |         # print("Info: calling Gamma.forget() with obs = {} ...".format(obs))  # DEBUG
67 |         self.k += self._k
68 |         self.lmbda += obs
69 | 
70 |     def update(self, obs):
71 |         """Add an observation: increase k by k0, and lmbda by obs (do not have to be normalized)."""
72 |         # print("Info: calling Gamma.update() with obs = {} ...".format(obs))  # DEBUG
73 |         self.k += self._k
74 |         self.lmbda += obs
75 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/Posterior/Posterior.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ Base class for a posterior. Cf. http://chercheurs.lille.inria.fr/ekaufman/NIPS13 Fig.1 for a list of posteriors. """
 3 | from __future__ import division, print_function  # Python 2 compatibility
 4 | 
 5 | __author__ = "Lilian Besson"
 6 | __version__ = "0.6"
 7 | 
 8 | 
 9 | class Posterior(object):
10 |     """ Manipulate posteriors experiments."""
11 | 
12 |     def __init__(self, *args, **kwargs):
13 |         raise NotImplementedError("This method __init__(self, *args, **kwargs) has to be implemented in the child class inheriting from Posterior.")
14 | 
15 |     def reset(self, *args, **kwargs):
16 |         """Reset posterior, new experiment."""
17 |         raise NotImplementedError("This method reset(self, *args, **kwargs) has to be implemented in the child class inheriting from Posterior.")
18 | 
19 |     def sample(self):
20 |         """Sample from the posterior."""
21 |         raise NotImplementedError("This method sample(self) has to be implemented in the child class inheriting from Posterior.")
22 | 
23 |     def quantile(self, p):
24 |         """p quantile from the posterior."""
25 |         raise NotImplementedError("This method quantile(self, p) has to be implemented in the child class inheriting from Posterior.")
26 | 
27 |     def mean(self):
28 |         """Mean of the posterior."""
29 |         raise NotImplementedError("This method mean(self) has to be implemented in the child class inheriting from Posterior.")
30 | 
31 |     def forget(self, obs):
32 |         """Forget last observation (never used)."""
33 |         raise NotImplementedError("This method forget(self, obs) has to be implemented in the child class inheriting from Posterior.")
34 | 
35 |     def update(self, obs):
36 |         """Update posterior with this observation."""
37 |         raise NotImplementedError("This method update(self, obs) has to be implemented in the child class inheriting from Posterior.")
38 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/Posterior/README.md:
--------------------------------------------------------------------------------
1 | # [Posteriors for Bayesian Index policies:](https://smpybandits.github.io/docs/Policies.Posterior.html)
2 | > See here the documentation: [docs/Policies.Posterior](https://smpybandits.github.io/docs/Policies.Posterior.html)
3 | 
4 | 
5 | - [`Beta`](Beta.py) is the default for [`Thompson`](Thompson.py) Sampling and [`BayesUCB`](BayesUCB.py), ideal for Bernoulli experiments,
6 | - [`Gamma`](Gamma.py) and [`Gauss`](Gauss.py) are more suited for respectively Poisson and Gaussian arms.


--------------------------------------------------------------------------------
/SMPyBandits/Policies/Posterior/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ Posteriors for Bayesian Index policies:
 3 | 
 4 | - :class:`Beta` is the default for :class:`Thompson` Sampling and :class:`BayesUCB`, ideal for Bernoulli experiments,
 5 | - :class:`Gamma` and :class:`Gauss` are more suited for respectively Poisson and Gaussian arms,
 6 | - :class:`DiscountedBeta` is the default for :class:`Policies.DiscountedThompson` Sampling, ideal for Bernoulli experiments on non stationary bandits.
 7 | """
 8 | from __future__ import division, print_function  # Python 2 compatibility
 9 | 
10 | __author__ = "Lilian Besson"
11 | __version__ = "0.9"
12 | 
13 | # from .Posterior import Posterior
14 | 
15 | from .Beta import Beta
16 | from .DiscountedBeta import DiscountedBeta
17 | from .Gamma import Gamma
18 | from .Gauss import Gauss
19 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/Posterior/with_proba.py:
--------------------------------------------------------------------------------
1 | ../with_proba.py


--------------------------------------------------------------------------------
/SMPyBandits/Policies/RCB.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The RCB, Randomized Confidence Bound, policy for bounded bandits.
 3 | 
 4 | - Reference: [["On the Optimality of Perturbations in Stochastic and Adversarial Multi-armed Bandit Problems", by Baekjin Kim, Ambuj Tewari, arXiv:1902.00610]](https://arxiv.org/pdf/1902.00610.pdf)
 5 | """
 6 | from __future__ import division, print_function  # Python 2 compatibility
 7 | 
 8 | __author__ = "Lilian Besson"
 9 | __version__ = "0.9"
10 | 
11 | try:
12 |     from .RandomizedIndexPolicy import RandomizedIndexPolicy
13 |     from .UCBalpha import UCBalpha
14 | except ImportError:
15 |     from RandomizedIndexPolicy import RandomizedIndexPolicy
16 |     from UCBalpha import UCBalpha
17 | 
18 | 
19 | class RCB(RandomizedIndexPolicy, UCBalpha):
20 |     """ The RCB, Randomized Confidence Bound, policy for bounded bandits.
21 | 
22 |     - Reference: [["On the Optimality of Perturbations in Stochastic and Adversarial Multi-armed Bandit Problems", by Baekjin Kim, Ambuj Tewari, arXiv:1902.00610]](https://arxiv.org/pdf/1902.00610.pdf)
23 |     """
24 |     # FIXME I should implement these RandomizedIndexPolicy variants in a more generic way!
25 |     pass
26 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/RH_UCB_Temp.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import division, print_function  # Python 2 compatibility
 3 | 
 4 | __author__ = "SlyJabiru"
 5 | __version__ = "0.1"
 6 | 
 7 | import random
 8 | from math import sqrt, log
 9 | import numpy as np
10 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
11 | 
12 | 
13 | try:
14 |     from .StrategicIndexPolicy import StrategicIndexPolicy
15 | except ImportError:
16 |     from StrategicIndexPolicy import StrategicIndexPolicy
17 | 
18 | 
19 | class RH_UCB_Temp(StrategicIndexPolicy):
20 |     def __init__(self, nbArms, nbAgents, nbArmsPerAgents,
21 |                  lower=0., amplitude=1.):
22 |         super(RH_UCB_Temp, self).__init__(nbArms, nbAgents, nbArmsPerAgents,
23 |                                      lower=lower, amplitude=amplitude)
24 | 
25 |     def computeAgentIndex(self, agent):
26 |         if self.agentPulls[agent] < 1:
27 |             return float('+inf')
28 |         else:
29 |             return (self.agentRewards[agent] / self.agentPulls[agent]) + sqrt(sqrt(self.t) * log(self.t) / self.agentPulls[agent])
30 | 
31 |     def computeArmIndex(self, arm):
32 |         if self.armPulls[arm] < 1:
33 |             return float('+inf')
34 |         else:
35 |             armPossession = np.cumsum(self.nbArmsPerAgents) - 1
36 |             temp = (armPossession >= arm)
37 |             agent = np.where(temp)[0][0]
38 |             return (self.armRewards[arm] / self.armPulls[arm]) + sqrt((2 * log(self.agentPulls[agent])) / self.armPulls[arm])
39 | 
40 |     def computeAllIndex(self):
41 |         """ Compute the current indices for all agent and all arms, in a vectorized manner."""
42 |         agentIndices = (self.agentRewards / self.agentPulls) + np.sqrt(np.sqrt(self.t) * np.log(self.t) / self.agentPulls)
43 | 
44 |         agentPullsRepeated = np.repeat(self.agentPulls, self.nbArmsPerAgents)
45 |         armIndices = (self.armRewards / self.armPulls) + np.sqrt((2 * np.log(agentPullsRepeated)) / self.armPulls)
46 | 
47 |         agentIndices[self.agentPulls < 1] = float('+inf')
48 |         armIndices[self.armPulls < 1] = float('+inf')
49 | 
50 |         self.agentIndex[:] = agentIndices
51 |         self.armIndex[:] = armIndices
52 | 
53 | 
54 | # --- Debugging
55 | 
56 | # if __name__ == "__main__":
57 | #     # Code for debugging purposes.
58 | #     from doctest import testmod
59 | #     print("\nTesting automatically all the docstring written in each functions of this module :")
60 | #     testmod(verbose=True)
61 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/StrategicUCB2PhaseRobustDeprecated.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import division, print_function  # Python 2 compatibility
 3 | 
 4 | __author__ = "SlyJabiru"
 5 | __version__ = "0.1"
 6 | 
 7 | 
 8 | from math import sqrt, log
 9 | import numpy as np
10 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
11 | 
12 | 
13 | try:
14 |     from .StrategicIndexPolicy import StrategicIndexPolicy
15 | except ImportError:
16 |     from StrategicIndexPolicy import StrategicIndexPolicy
17 | 
18 | 
19 | class StrategicUCB2PhaseRobustDeprecated(StrategicIndexPolicy):
20 |     def computeAgentIndex(self, agent):
21 |         if self.agentPulls[agent] < 1:
22 |             return float('+inf')
23 |         else:
24 |             return (self.agentRewards[agent] / self.agentPulls[agent]) + sqrt(sqrt(self.t * log(self.t)) / self.agentPulls[agent])
25 | 
26 |     def computeArmIndex(self, arm):
27 |         if self.armPulls[arm] < 1:
28 |             return float('+inf')
29 |         else:
30 |             armPossession = np.cumsum(self.nbArmsPerAgents) - 1
31 |             temp = (armPossession >= arm)
32 |             agent = np.where(temp)[0][0]
33 |             return (self.armRewards[arm] / self.armPulls[arm]) + sqrt((2 * log(self.agentPulls[agent])) / self.armPulls[arm])
34 | 
35 |     def computeAllIndex(self):
36 |         """ Compute the current indices for all agent and all arms, in a vectorized manner."""
37 |         agentIndices = (self.agentRewards / self.agentPulls) + np.sqrt(np.sqrt(self.t * np.log(self.t)) / self.agentPulls)
38 | 
39 |         agentPullsRepeated = np.repeat(self.agentPulls, self.nbArmsPerAgents)
40 |         armIndices = (self.armRewards / self.armPulls) + np.sqrt((2 * np.log(agentPullsRepeated)) / self.armPulls)
41 | 
42 |         agentIndices[self.agentPulls < 1] = float('+inf')
43 |         armIndices[self.armPulls < 1] = float('+inf')
44 |         
45 |         self.agentIndex[:] = agentIndices
46 |         self.armIndex[:] = armIndices
47 |         
48 | 
49 | # --- Debugging
50 | 
51 | # if __name__ == "__main__":
52 | #     # Code for debugging purposes.
53 | #     from doctest import testmod
54 | #     print("\nTesting automatically all the docstring written in each functions of this module :")
55 | #     testmod(verbose=True)
56 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/SuccessiveElimination.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ Generic policy based on successive elimination, mostly useless except to maintain a clear hierarchy of inheritance.
 3 | """
 4 | 
 5 | __author__ = "Lilian Besson"
 6 | __version__ = "0.9"
 7 | 
 8 | from numpy import sqrt, log
 9 | import numpy as np
10 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
11 | 
12 | try:
13 |     from .IndexPolicy import IndexPolicy
14 | except ImportError:
15 |     from IndexPolicy import IndexPolicy
16 | 
17 | 
18 | class SuccessiveElimination(IndexPolicy):
19 |     """ Generic policy based on successive elimination, mostly useless except to maintain a clear hierarchy of inheritance.
20 |     """
21 | 
22 |     def choice(self):
23 |         r""" In policy based on successive elimination, choosing an arm is the same as choosing an arm from the set of active arms (``self.activeArms``) with method ``choiceFromSubSet``.
24 |         """
25 |         return self.choiceFromSubSet(self.activeArms)
26 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/TakeFixedArm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ TakeFixedArm: always select a fixed arm.
 3 | This is the perfect static policy if armIndex = bestArmIndex (not realistic, for test only).
 4 | """
 5 | from __future__ import division, print_function  # Python 2 compatibility
 6 | 
 7 | __author__ = "Lilian Besson"
 8 | __version__ = "0.9"
 9 | 
10 | try:
11 |     from .BasePolicy import BasePolicy
12 | except ImportError:
13 |     from BasePolicy import BasePolicy
14 | 
15 | 
16 | class TakeFixedArm(BasePolicy):
17 |     """ TakeFixedArm: always select a fixed arm.
18 |     This is the perfect static policy if armIndex = bestArmIndex (not realistic, for test only).
19 |     """
20 | 
21 |     def __init__(self, nbArms, armIndex=None, lower=0., amplitude=1.):
22 |         self.nbArms = nbArms  #: Number of arms
23 |         if armIndex is None:
24 |             armIndex = 0
25 |         self.armIndex = armIndex  #: Fixed arm
26 | 
27 |     def __str__(self):
28 |         return "TakeFixedArm({})".format(self.armIndex)
29 | 
30 |     def startGame(self):
31 |         """Nothing to do."""
32 |         pass
33 | 
34 |     def getReward(self, arm, reward):
35 |         """Nothing to do."""
36 |         pass
37 | 
38 |     def choice(self):
39 |         """Always the same choice."""
40 |         return self.armIndex
41 | 
42 |     def choiceWithRank(self, rank=1):
43 |         """ Ignore the rank."""
44 |         return self.choice()
45 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/TakeRandomFixedArm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ TakeRandomFixedArm: always select a fixed arm.
 3 | This is the perfect static policy if armIndex = bestArmIndex (not realistic, for test only).
 4 | """
 5 | from __future__ import division, print_function  # Python 2 compatibility
 6 | 
 7 | __author__ = "Lilian Besson"
 8 | __version__ = "0.9"
 9 | 
10 | import numpy as np
11 | import numpy.random as rn
12 | 
13 | try:
14 |     from .TakeFixedArm import TakeFixedArm
15 | except ImportError:
16 |     from TakeFixedArm import TakeFixedArm
17 | 
18 | 
19 | class TakeRandomFixedArm(TakeFixedArm):
20 |     """ TakeRandomFixedArm: first selects a random sub-set of arms, then always select from it. """
21 | 
22 |     def __init__(self, nbArms, lower=0., amplitude=1., nbArmIndexes=None):
23 |         self.nbArms = nbArms  #: Number of arms
24 |         #: Get the number of arms, randomly!
25 |         if nbArmIndexes is None:
26 |             nbArmIndexes = rn.randint(low=1, high=1 + int(nbArms / 2.))
27 |         #: Fix the set of arms
28 |         self.armIndexes = list(rn.choice(np.arange(nbArms), size=nbArmIndexes, replace=False))
29 | 
30 |     def __str__(self):
31 |         return "TakeRandomFixedArm({})".format(self.armIndexes)
32 | 
33 |     def choice(self):
34 |         """Uniform choice from armIndexes."""
35 |         return rn.choice(self.armIndexes)
36 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/Thompson.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The Thompson (Bayesian) index policy.
 3 | 
 4 | - By default, it uses a Beta posterior (:class:`Policies.Posterior.Beta`), one by arm.
 5 | - Reference: [Thompson - Biometrika, 1933].
 6 | """
 7 | from __future__ import division, print_function  # Python 2 compatibility
 8 | 
 9 | __author__ = "Olivier Cappé, Aurélien Garivier, Emilie Kaufmann, Lilian Besson"
10 | __version__ = "0.9"
11 | 
12 | try:
13 |     from .BayesianIndexPolicy import BayesianIndexPolicy
14 | except (ImportError, SystemError):
15 |     from BayesianIndexPolicy import BayesianIndexPolicy
16 | 
17 | 
18 | class Thompson(BayesianIndexPolicy):
19 |     r"""The Thompson (Bayesian) index policy.
20 | 
21 |     - By default, it uses a Beta posterior (:class:`Policies.Posterior.Beta`), one by arm.
22 |     - Prior is initially flat, i.e., :math:`a=\alpha_0=1` and :math:`b=\beta_0=1`.
23 | 
24 |     - A non-flat prior for each arm can be given with parameters ``a`` and ``b``, for instance::
25 | 
26 |         nbArms = 2
27 |         prior_failures  = a = 100
28 |         prior_successes = b = 50
29 |         policy = Thompson(nbArms, a=a, b=b)
30 |         np.mean([policy.choice() for _ in range(1000)])  # 0.515 ~= 0.5: each arm has same prior!
31 | 
32 |     - A different prior for each arm can be given with parameters ``params_for_each_posterior``, for instance::
33 | 
34 |         nbArms = 2
35 |         params0 = { 'a': 10, 'b': 5}  # mean 1/3
36 |         params1 = { 'a': 5, 'b': 10}  # mean 2/3
37 |         params = [params0, params1]
38 |         policy = Thompson(nbArms, params_for_each_posterior=params)
39 |         np.mean([policy.choice() for _ in range(1000)])  # 0.9719 ~= 1: arm 1 is better than arm 0 !
40 | 
41 |     - Reference: [Thompson - Biometrika, 1933].
42 |     """
43 | 
44 |     def __str__(self):
45 |         return "Thompson Sampling"
46 |     
47 |     def computeIndex(self, arm):
48 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k, giving :math:`S_k(t)` rewards of 1, by sampling from the Beta posterior:
49 | 
50 |         .. math::
51 |             A(t) &\sim U(\arg\max_{1 \leq k \leq K} I_k(t)),\\
52 |             I_k(t) &\sim \mathrm{Beta}(1 + \tilde{S_k}(t), 1 + \tilde{N_k}(t) - \tilde{S_k}(t)).
53 |         """
54 |         return self.posterior[arm].sample()
55 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/UCB.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The UCB policy for bounded bandits.
 3 | 
 4 | - Reference: [Lai & Robbins, 1985].
 5 | """
 6 | from __future__ import division, print_function  # Python 2 compatibility
 7 | 
 8 | __author__ = "Lilian Besson"
 9 | __version__ = "0.1"
10 | 
11 | from math import sqrt, log
12 | import numpy as np
13 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
14 | 
15 | try:
16 |     from .IndexPolicy import IndexPolicy
17 | except ImportError:
18 |     from IndexPolicy import IndexPolicy
19 | 
20 | 
21 | class UCB(IndexPolicy):
22 |     """ The UCB policy for bounded bandits.
23 | 
24 |     - Reference: [Lai & Robbins, 1985].
25 |     """
26 | 
27 |     def computeIndex(self, arm):
28 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k:
29 | 
30 |         .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\frac{2 \log(t)}{N_k(t)}}.
31 |         """
32 |         if self.pulls[arm] < 1:
33 |             return float('+inf')
34 |         else:
35 |             return (self.rewards[arm] / self.pulls[arm]) + sqrt((2 * log(self.t)) / self.pulls[arm])
36 | 
37 |     def computeAllIndex(self):
38 |         """ Compute the current indexes for all arms, in a vectorized manner."""
39 |         indexes = (self.rewards / self.pulls) + np.sqrt((2 * np.log(self.t)) / self.pulls)
40 |         indexes[self.pulls < 1] = float('+inf')
41 |         self.index[:] = indexes
42 | 
43 | 
44 | # --- Debugging
45 | 
46 | if __name__ == "__main__":
47 |     # Code for debugging purposes.
48 |     from doctest import testmod
49 |     print("\nTesting automatically all the docstring written in each functions of this module :")
50 |     testmod(verbose=True)
51 | 
52 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/UCBH.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The UCB-H policy for bounded bandits, with knowing the horizon.
 3 | Reference: [Audibert et al. 09].
 4 | """
 5 | 
 6 | __author__ = "Lilian Besson"
 7 | __version__ = "0.6"
 8 | 
 9 | from numpy import sqrt, log
10 | import numpy as np
11 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
12 | 
13 | try:
14 |     from .UCBalpha import UCBalpha, ALPHA
15 | except ImportError:
16 |     from UCBalpha import UCBalpha, ALPHA
17 | 
18 | 
19 | class UCBH(UCBalpha):
20 |     """ The UCB-H policy for bounded bandits, with knowing the horizon.
21 |     Reference: [Audibert et al. 09].
22 |     """
23 | 
24 |     def __init__(self, nbArms, horizon=None, alpha=ALPHA, lower=0., amplitude=1.):
25 |         super(UCBH, self).__init__(nbArms, lower=lower, amplitude=amplitude)
26 |         self.horizon = int(horizon)  #: Parameter :math:`T` = known horizon of the experiment.
27 |         self.alpha = alpha  #: Parameter alpha
28 | 
29 |     def __str__(self):
30 |         return r"UCB-H($T={}$, $\alpha={:.3g}$)".format(self.horizon, self.alpha)
31 | 
32 |     def computeIndex(self, arm):
33 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k:
34 | 
35 |         .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\frac{\alpha \log(T)}{2 N_k(t)}}.
36 |         """
37 |         if self.pulls[arm] < 1:
38 |             return float('+inf')
39 |         else:
40 |             return (self.rewards[arm] / self.pulls[arm]) + sqrt((self.alpha * log(self.horizon)) / (2 * self.pulls[arm]))
41 | 
42 |     def computeAllIndex(self):
43 |         """ Compute the current indexes for all arms, in a vectorized manner."""
44 |         indexes = (self.rewards / self.pulls) + np.sqrt((self.alpha * np.log(self.horizon)) / (2 * self.pulls))
45 |         indexes[self.pulls < 1] = float('+inf')
46 |         self.index[:] = indexes
47 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/UCBV.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The UCB-V policy for bounded bandits, with a variance correction term.
 3 | Reference: [Audibert, Munos, & Szepesvári - Theoret. Comput. Sci., 2009].
 4 | """
 5 | from __future__ import division, print_function  # Python 2 compatibility
 6 | 
 7 | __author__ = "Olivier Cappé, Aurélien Garivier, Lilian Besson"
 8 | __version__ = "0.5"
 9 | 
10 | from math import sqrt, log
11 | import numpy as np
12 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
13 | 
14 | try:
15 |     from .UCB import UCB
16 | except ImportError:
17 |     from UCB import UCB
18 | 
19 | 
20 | class UCBV(UCB):
21 |     """ The UCB-V policy for bounded bandits, with a variance correction term.
22 |     Reference: [Audibert, Munos, & Szepesvári - Theoret. Comput. Sci., 2009].
23 |     """
24 |     def __str__(self):
25 |         return "UCB-V"
26 | 
27 |     def __init__(self, nbArms, lower=0., amplitude=1.):
28 |         super(UCBV, self).__init__(nbArms, lower=lower, amplitude=amplitude)
29 |         self.rewardsSquared = np.zeros(self.nbArms)  #: Keep track of squared of rewards, to compute an empirical variance
30 | 
31 |     def startGame(self):
32 |         super(UCBV, self).startGame()
33 |         self.rewardsSquared.fill(0)
34 | 
35 |     def getReward(self, arm, reward):
36 |         """Give a reward: increase t, pulls, and update cumulated sum of rewards and of rewards squared for that arm (normalized in [0, 1])."""
37 |         super(UCBV, self).getReward(arm, reward)
38 |         self.rewardsSquared[arm] += ((reward - self.lower) / self.amplitude) ** 2
39 | 
40 |     def computeIndex(self, arm):
41 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k:
42 | 
43 |         .. math::
44 | 
45 |            \hat{\mu}_k(t) &= \frac{X_k(t)}{N_k(t)}, \\
46 |            V_k(t) &= \frac{Z_k(t)}{N_k(t)} - \hat{\mu}_k(t)^2, \\
47 |            I_k(t) &= \hat{\mu}_k(t) + \sqrt{\frac{2 \log(t) V_k(t)}{N_k(t)}} + 3 (b - a) \frac{\log(t)}{N_k(t)}.
48 | 
49 |         Where rewards are in :math:`[a, b]`, and :math:`V_k(t)` is an estimator of the variance of rewards,
50 |         obtained from :math:`X_k(t) = \sum_{\sigma=1}^{t} 1(A(\sigma) = k) r_k(\sigma)` is the sum of rewards from arm k,
51 |         and :math:`Z_k(t) = \sum_{\sigma=1}^{t} 1(A(\sigma) = k) r_k(\sigma)^2` is the sum of rewards *squared*.
52 |         """
53 |         if self.pulls[arm] < 1:
54 |             return float('+inf')
55 |         else:
56 |             mean = self.rewards[arm] / self.pulls[arm]   # Mean estimate
57 |             variance = (self.rewardsSquared[arm] / self.pulls[arm]) - mean ** 2  # Variance estimate
58 |             return mean + sqrt(2.0 * log(self.t) * variance / self.pulls[arm]) + 3.0 * self.amplitude * log(self.t) / self.pulls[arm]
59 | 
60 |     def computeAllIndex(self):
61 |         """ Compute the current indexes for all arms, in a vectorized manner."""
62 |         means = self.rewards / self.pulls   # Mean estimate
63 |         variances = (self.rewardsSquared / self.pulls) - means ** 2  # Variance estimate
64 |         indexes = means + np.sqrt(2.0 * np.log(self.t) * variances / self.pulls) + 3.0 * self.amplitude * np.log(self.t) / self.pulls
65 |         indexes[self.pulls < 1] = float('+inf')
66 |         self.index[:] = indexes
67 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/UCBVtuned.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The UCBV-Tuned policy for bounded bandits, with a tuned variance correction term.
 3 | Reference: [Auer et al. 02].
 4 | """
 5 | from __future__ import division, print_function  # Python 2 compatibility
 6 | 
 7 | __author__ = "Olivier Cappé, Aurélien Garivier, Lilian Besson"
 8 | __version__ = "0.5"
 9 | 
10 | from math import sqrt, log
11 | import numpy as np
12 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
13 | 
14 | try:
15 |     from .UCBV import UCBV
16 | except ImportError:
17 |     from UCBV import UCBV
18 | 
19 | 
20 | class UCBVtuned(UCBV):
21 |     """ The UCBV-Tuned policy for bounded bandits, with a tuned variance correction term.
22 |     Reference: [Auer et al. 02].
23 |     """
24 |     def __str__(self):
25 |         return "UCB-V-Tuned"
26 | 
27 |     def computeIndex(self, arm):
28 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k:
29 | 
30 |         .. math::
31 | 
32 |            \hat{\mu}_k(t) &= \frac{X_k(t)}{N_k(t)}, \\
33 |            V_k(t) &= \frac{Z_k(t)}{N_k(t)} - \hat{\mu}_k(t)^2, \\
34 |            V'_k(t) &= V_k(t) + \sqrt{\frac{2 \log(t)}{N_k(t)}}, \\
35 |            I_k(t) &= \hat{\mu}_k(t) + \sqrt{\frac{\log(t) V'_k(t)}{N_k(t)}}.
36 | 
37 |         Where :math:`V'_k(t)` is an other estimator of the variance of rewards,
38 |         obtained from :math:`X_k(t) = \sum_{\sigma=1}^{t} 1(A(\sigma) = k) r_k(\sigma)` is the sum of rewards from arm k,
39 |         and :math:`Z_k(t) = \sum_{\sigma=1}^{t} 1(A(\sigma) = k) r_k(\sigma)^2` is the sum of rewards *squared*.
40 |         """
41 |         if self.pulls[arm] < 1:
42 |             return float('+inf')
43 |         else:
44 |             mean = self.rewards[arm] / self.pulls[arm]   # Mean estimate
45 |             variance = (self.rewardsSquared[arm] / self.pulls[arm]) - mean ** 2  # Variance estimate
46 |             # Correct variance estimate
47 |             variance += sqrt(2.0 * log(self.t) / self.pulls[arm])
48 |             return mean + sqrt(log(self.t) * variance / self.pulls[arm])
49 | 
50 |     def computeAllIndex(self):
51 |         """ Compute the current indexes for all arms, in a vectorized manner."""
52 |         means = self.rewards / self.pulls   # Mean estimate
53 |         variances = (self.rewardsSquared / self.pulls) - means ** 2  # Variance estimate
54 |         variances += np.sqrt(2.0 * np.log(self.t) / self.pulls)
55 |         indexes = means + np.sqrt(np.log(self.t) * variances / self.pulls)
56 |         indexes[self.pulls < 1] = float('+inf')
57 |         self.index[:] = indexes
58 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/UCBalpha.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The UCB1 (UCB-alpha) index policy, modified to take a random permutation order for the initial exploration of each arm (reduce collisions in the multi-players setting).
 3 | Reference: [Auer et al. 02].
 4 | """
 5 | from __future__ import division, print_function  # Python 2 compatibility
 6 | 
 7 | __author__ = "Lilian Besson"
 8 | __version__ = "0.9"
 9 | 
10 | from math import sqrt, log
11 | import numpy as np
12 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
13 | 
14 | try:
15 |     from .UCB import UCB
16 | except ImportError:
17 |     from UCB import UCB
18 | 
19 | #: Default parameter for alpha
20 | ALPHA = 1
21 | ALPHA = 4
22 | 
23 | 
24 | class UCBalpha(UCB):
25 |     """ The UCB1 (UCB-alpha) index policy, modified to take a random permutation order for the initial exploration of each arm (reduce collisions in the multi-players setting).
26 |     Reference: [Auer et al. 02].
27 |     """
28 | 
29 |     def __init__(self, nbArms, alpha=ALPHA, lower=0., amplitude=1.):
30 |         super(UCBalpha, self).__init__(nbArms, lower=lower, amplitude=amplitude)
31 |         assert alpha >= 0, "Error: the alpha parameter for UCBalpha class has to be >= 0."  # DEBUG
32 |         self.alpha = alpha  #: Parameter alpha
33 | 
34 |     def __str__(self):
35 |         return r"UCB($\alpha={:.3g}$)".format(self.alpha)
36 | 
37 |     def computeIndex(self, arm):
38 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k:
39 | 
40 |         .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\frac{\alpha \log(t)}{2 N_k(t)}}.
41 |         """
42 |         if self.pulls[arm] < 1:
43 |             return float('+inf')
44 |         else:
45 |             return (self.rewards[arm] / self.pulls[arm]) + sqrt((self.alpha * log(self.t)) / (2 * self.pulls[arm]))
46 | 
47 |     def computeAllIndex(self):
48 |         """ Compute the current indexes for all arms, in a vectorized manner."""
49 |         indexes = (self.rewards / self.pulls) + np.sqrt((self.alpha * np.log(self.t)) / (2 * self.pulls))
50 |         indexes[self.pulls < 1] = float('+inf')
51 |         self.index[:] = indexes
52 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/UCBmin.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | r""" The UCB-min policy for bounded bandits, with a :math:`\min\left(1, \sqrt{\frac{\log(t)}{2 N_k(t)}}\right)` term.
 3 | Reference: [Anandkumar et al., 2010].
 4 | """
 5 | from __future__ import division, print_function  # Python 2 compatibility
 6 | 
 7 | __author__ = "Lilian Besson"
 8 | __version__ = "0.1"
 9 | 
10 | from math import sqrt, log
11 | import numpy as np
12 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
13 | 
14 | try:
15 |     from .UCB import UCB
16 | except ImportError:
17 |     from UCB import UCB
18 | 
19 | 
20 | class UCBmin(UCB):
21 |     r""" The UCB-min policy for bounded bandits, with a :math:`\min\left(1, \sqrt{\frac{\log(t)}{2 N_k(t)}}\right)` term.
22 |     Reference: [Anandkumar et al., 2010].
23 |     """
24 | 
25 |     def computeIndex(self, arm):
26 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k:
27 | 
28 |         .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \min\left(1, \sqrt{\frac{\log(t)}{2 N_k(t)}}\right).
29 |         """
30 |         if self.pulls[arm] < 1:
31 |             return float('+inf')
32 |         else:
33 |             return (self.rewards[arm] / self.pulls[arm]) + min(1., sqrt(log(self.t) / (2 * self.pulls[arm])))
34 | 
35 |     def computeAllIndex(self):
36 |         """ Compute the current indexes for all arms, in a vectorized manner."""
37 |         indexes = (self.rewards / self.pulls) + np.minimum(1., np.sqrt((2 * np.log10(self.t)) / self.pulls))
38 |         indexes[self.pulls < 1] = float('+inf')
39 |         self.index[:] = indexes
40 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/UCBplus.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | r""" The UCB+ policy for bounded bandits, with a small trick on the index.
 3 | 
 4 | - Reference: [Auer et al. 2002], and [[Garivier et al. 2016](https://arxiv.org/pdf/1605.08988.pdf)] (it is noted :math:`\mathrm{UCB}^*` in the second article).
 5 | """
 6 | from __future__ import division, print_function  # Python 2 compatibility
 7 | 
 8 | __author__ = "Lilian Besson"
 9 | __version__ = "0.9"
10 | 
11 | from math import sqrt, log
12 | import numpy as np
13 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
14 | 
15 | try:
16 |     from .UCB import UCB
17 | except ImportError:
18 |     from UCB import UCB
19 | 
20 | 
21 | class UCBplus(UCB):
22 |     r""" The UCB+ policy for bounded bandits, with a small trick on the index.
23 | 
24 |     - Reference: [Auer et al. 2002], and [[Garivier et al. 2016](https://arxiv.org/pdf/1605.08988.pdf)] (it is noted :math:`\mathrm{UCB}^*` in the second article).
25 |     """
26 | 
27 |     def __str__(self):
28 |         return "UCB+"
29 | 
30 |     def computeIndex(self, arm):
31 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k:
32 | 
33 |         .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\max\left(0, \frac{\log(t / N_k(t))}{2 N_k(t)}\right)}.
34 |         """
35 |         if self.pulls[arm] < 1:
36 |             return float('+inf')
37 |         else:
38 |             return (self.rewards[arm] / self.pulls[arm]) + sqrt(max(0., log(self.t / (self.pulls[arm]))) / (2 * self.pulls[arm]))
39 | 
40 |     def computeAllIndex(self):
41 |         """ Compute the current indexes for all arms, in a vectorized manner."""
42 |         indexes = (self.rewards / self.pulls) + np.sqrt(np.maximum(0., (2 * np.log10(self.t)) / self.pulls))
43 |         indexes[self.pulls < 1] = float('+inf')
44 |         self.index[:] = indexes
45 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/UCBrandomInit.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The UCB index policy, modified to take a random permutation order for the initial exploration of each arm (could reduce collisions in the multi-players setting).
 3 | Reference: [Lai & Robbins, 1985].
 4 | """
 5 | from __future__ import division, print_function  # Python 2 compatibility
 6 | 
 7 | __author__ = "Lilian Besson"
 8 | __version__ = "0.2"
 9 | 
10 | import numpy as np
11 | 
12 | try:
13 |     from .UCB import UCB
14 | except ImportError:
15 |     from UCB import UCB
16 | 
17 | 
18 | class UCBrandomInit(UCB):
19 |     """ The UCB index policy, modified to take a random permutation order for the initial exploration of each arm (could reduce collisions in the multi-players setting).
20 |     Reference: [Lai & Robbins, 1985].
21 |     """
22 | 
23 |     def __init__(self, nbArms, lower=0., amplitude=1.):
24 |         super(UCBrandomInit, self).__init__(nbArms, lower=lower, amplitude=amplitude)
25 |         # Trying to randomize the order of the initial visit to each arm; as this determinism breaks its habitility to play efficiently in multi-players games
26 |         self._initial_exploration = np.random.permutation(nbArms)
27 |         # The proba that another player has the same is nbPlayers / factorial(nbArms) : should be SMALL !
28 |         # print("One UCB player with _initial_exploration =", self._initial_exploration)  # DEBUG
29 | 
30 |     def choice(self):
31 |         if self.t < self.nbArms:  # Force to first visit each arm in a certain random order
32 |             return self._initial_exploration[self.t]  # Better: random permutation!
33 |         else:
34 |             return super(UCBrandomInit, self).choice()
35 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/Uniform.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ Uniform: the fully uniform policy who selects randomly (uniformly) an arm at each step (stupid).
 3 | """
 4 | 
 5 | __author__ = "Lilian Besson"
 6 | __version__ = "0.1"
 7 | 
 8 | import random
 9 | 
10 | try:
11 |     from .BasePolicy import BasePolicy
12 | except ImportError:
13 |     from BasePolicy import BasePolicy
14 | 
15 | 
16 | class Uniform(BasePolicy):
17 |     """ Uniform: the fully uniform policy who selects randomly (uniformly) an arm at each step (stupid).
18 |     """
19 | 
20 |     def __init__(self, nbArms, lower=0., amplitude=1.):
21 |         """Nothing to do."""
22 |         self.nbArms = nbArms  #: Number of arms
23 | 
24 |     def __str__(self):
25 |         return "U(1..{})".format(self.nbArms)
26 | 
27 |     def startGame(self):
28 |         """Nothing to do."""
29 |         pass
30 | 
31 |     def getReward(self, arm, reward):
32 |         """Nothing to do."""
33 |         pass
34 | 
35 |     def choice(self):
36 |         """Uniform random choice between 0 and nbArms - 1 (included)."""
37 |         return random.randint(0, self.nbArms - 1)
38 | 
39 |     def choiceWithRank(self, rank=1):
40 |         """Ignore the rank!"""
41 |         return self.choice()
42 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/UniformOnSome.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ UniformOnSome: a fully uniform policy who selects randomly (uniformly) an arm among a fix set, at each step (stupid).
 3 | """
 4 | from __future__ import division, print_function  # Python 2 compatibility
 5 | 
 6 | __author__ = "Lilian Besson"
 7 | __version__ = "0.1"
 8 | 
 9 | import random
10 | 
11 | try:
12 |     from .Uniform import Uniform
13 | except ImportError:
14 |     from Uniform import Uniform
15 | 
16 | 
17 | class UniformOnSome(Uniform):
18 |     """ UniformOnSome: a fully uniform policy who selects randomly (uniformly) an arm among a fix set, at each step (stupid).
19 |     """
20 | 
21 |     def __init__(self, nbArms, armIndexes=None, lower=0., amplitude=1.):
22 |         self.nbArms = nbArms  #: Number of arms
23 |         if armIndexes is None:
24 |             armIndexes = list(range(nbArms))
25 |         self.armIndexes = armIndexes  #: Arms from where to uniformly sample
26 | 
27 |     def __str__(self):
28 |         return "UniformOnSome({})".format(self.armIndexes)
29 | 
30 |     def choice(self):
31 |         """Uniform choice from armIndexes."""
32 |         return random.choice(self.armIndexes)
33 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/_test_for_BESA_core_function.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ Test of the core function of BESA algorithm.
 3 | 
 4 | $ ipython
 5 | In [1]: run _test_for_BESA_core_function.py
 6 | 
 7 | In [2]: %timeit manualbranching(random_samples(a, mu_a, N, 2 * N), random_samples(b, mu_b, N, 2 * N))
 8 | 46.3 µs ± 3.95 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
 9 | 
10 | In [3]: %timeit numpytest(random_samples(a, mu_a, N, 2 * N), random_samples(b, mu_b, N, 2 * N))
11 | 61.9 µs ± 6.76 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
12 | """
13 | from __future__ import division, print_function  # Python 2 compatibility
14 | 
15 | __author__ = "Lilian Besson"
16 | __version__ = "0.9"
17 | 
18 | import numpy as np
19 | import timeit
20 | 
21 | 
22 | def manualbranching(tuple_a, tuple_b):
23 |     Na, mean_a, a = tuple_a
24 |     Nb, mean_b, b = tuple_b
25 |     if mean_a > mean_b:
26 |         return a
27 |     elif mean_a < mean_b:
28 |         return b
29 |     else:
30 |         if Na < Nb:
31 |             return a
32 |         elif Na > Nb:
33 |             return b
34 |         else:  # if no way of breaking the tie, choose uniformly at random
35 |             return np.random.choice([a, b])
36 | 
37 | 
38 | def numpytest(tuple_a, tuple_b):
39 |     Na, mean_a, samples_a, a = tuple_a
40 |     Nb, mean_b, samples_b, b = tuple_b
41 |     if mean_a != mean_b:
42 |         return [a, b][np.argmax([mean_a, mean_b])]
43 |     else:
44 |         return [a, b][np.argmin([Na, Nb])]
45 | 
46 | 
47 | def random_samples(i, mu, N1, N2):
48 |     N1, N2 = min(N1, N2), max(N1, N2)
49 |     N = np.random.randint(N1, high=N2)
50 |     samples = np.asarray(np.random.binomial(1, mu, N), dtype=float)
51 |     mean = np.mean(samples)
52 |     return N, mean, samples, i
53 | 
54 | 
55 | def main(N=10, mu_a=0.5, mu_b=0.5):
56 |     a, b = 0, 1
57 |     print("For the function 'manualbranching' run:")
58 |     print("%timeit manualbranching(random_samples(a, mu_a, N, 2 * N), random_samples(b, mu_b, N, 2 * N))")
59 |     print("For the function 'numpytest' run:")
60 |     print("%timeit numpytest(random_samples(a, mu_a, N, 2 * N), random_samples(b, mu_b, N, 2 * N))")
61 | 
62 | 
63 | if __name__ == '__main__':
64 |     main()
65 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/klUCBH.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The kl-UCB-H policy, for one-parameter exponential distributions.
 3 | Reference: [Lai 87](https://projecteuclid.org/download/pdf_1/euclid.aos/1176350495)
 4 | """
 5 | from __future__ import division, print_function  # Python 2 compatibility
 6 | 
 7 | __author__ = "Lilian Besson"
 8 | __version__ = "0.1"
 9 | 
10 | from math import log
11 | import numpy as np
12 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
13 | 
14 | try:
15 |     from .kullback import klucbBern
16 |     from .klUCB import klUCB, c
17 | except ImportError:
18 |     from kullback import klucbBern
19 |     from klUCB import klUCB, c
20 | 
21 | 
22 | class klUCBH(klUCB):
23 |     """ The kl-UCB-H policy, for one-parameter exponential distributions.
24 |     Reference: [Lai 87](https://projecteuclid.org/download/pdf_1/euclid.aos/1176350495)
25 |     """
26 | 
27 |     def __init__(self, nbArms, horizon=None, tolerance=1e-4, klucb=klucbBern, c=c, lower=0., amplitude=1.):
28 |         super(klUCBH, self).__init__(nbArms, tolerance=tolerance, klucb=klucb, c=c, lower=lower, amplitude=amplitude)
29 |         self.horizon = int(horizon)  #: Parameter :math:`T` = known horizon of the experiment.
30 | 
31 |     def __str__(self):
32 |         return r"kl-UCB-H($T={}$, {}{})".format(self.horizon, "" if self.c == 1 else r"$c={:.3g}$".format(self.c), self.klucb.__name__[5:])
33 | 
34 |     def computeIndex(self, arm):
35 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k:
36 | 
37 |         .. math::
38 | 
39 |             \hat{\mu}_k(t) &= \frac{X_k(t)}{N_k(t)}, \\
40 |             U_k(t) &= \sup\limits_{q \in [a, b]} \left\{ q : \mathrm{kl}(\hat{\mu}_k(t), q) \leq \frac{c \log(T)}{N_k(t)} \right\},\\
41 |             I_k(t) &= U_k(t).
42 | 
43 |         If rewards are in :math:`[a, b]` (default to :math:`[0, 1]`) and :math:`\mathrm{kl}(x, y)` is the Kullback-Leibler divergence between two distributions of means x and y (see :mod:`Arms.kullback`),
44 |         and c is the parameter (default to 1).
45 |         """
46 |         if self.pulls[arm] < 1:
47 |             return float('+inf')
48 |         else:
49 |             # XXX We could adapt tolerance to the value of self.t
50 |             return self.klucb(self.rewards[arm] / self.pulls[arm], self.c * log(self.horizon) / self.pulls[arm], self.tolerance)
51 | 
52 |     def computeAllIndex(self):
53 |         """ Compute the current indexes for all arms, in a vectorized manner."""
54 |         indexes = self.klucb_vect(self.rewards / self.pulls, self.c * np.log(self.horizon) / self.pulls, self.tolerance)
55 |         indexes[self.pulls < 1] = float('+inf')
56 |         self.index[:] = indexes
57 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/klUCBHPlus.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The improved kl-UCB-H+ policy, for one-parameter exponential distributions.
 3 | Reference: [Lai 87](https://projecteuclid.org/download/pdf_1/euclid.aos/1176350495)
 4 | """
 5 | from __future__ import division, print_function  # Python 2 compatibility
 6 | 
 7 | __author__ = "Lilian Besson"
 8 | __version__ = "0.1"
 9 | 
10 | from math import log
11 | import numpy as np
12 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
13 | 
14 | try:
15 |     from .kullback import klucbBern
16 |     from .klUCB import klUCB, c
17 | except ImportError:
18 |     from kullback import klucbBern
19 |     from klUCB import klUCB, c
20 | 
21 | 
22 | class klUCBHPlus(klUCB):
23 |     """ The improved kl-UCB-H+ policy, for one-parameter exponential distributions.
24 |     Reference: [Lai 87](https://projecteuclid.org/download/pdf_1/euclid.aos/1176350495)
25 |     """
26 | 
27 |     def __init__(self, nbArms, horizon=None, tolerance=1e-4, klucb=klucbBern, c=c, lower=0., amplitude=1.):
28 |         super(klUCBHPlus, self).__init__(nbArms, tolerance=tolerance, klucb=klucb, c=c, lower=lower, amplitude=amplitude)
29 |         self.horizon = int(horizon)  #: Parameter :math:`T` = known horizon of the experiment.
30 | 
31 |     def __str__(self):
32 |         return r"kl-UCB-H+($T={}$, {}{})".format(self.horizon, "" if self.c == 1 else r"$c={:.3g}$".format(self.c), self.klucb.__name__[5:])
33 | 
34 |     def computeIndex(self, arm):
35 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k:
36 | 
37 |         .. math::
38 | 
39 |             \hat{\mu}_k(t) &= \frac{X_k(t)}{N_k(t)}, \\
40 |             U_k(t) &= \sup\limits_{q \in [a, b]} \left\{ q : \mathrm{kl}(\hat{\mu}_k(t), q) \leq \frac{c \log(T / N_k(t))}{N_k(t)} \right\},\\
41 |             I_k(t) &= U_k(t).
42 | 
43 |         If rewards are in :math:`[a, b]` (default to :math:`[0, 1]`) and :math:`\mathrm{kl}(x, y)` is the Kullback-Leibler divergence between two distributions of means x and y (see :mod:`Arms.kullback`),
44 |         and c is the parameter (default to 1).
45 |         """
46 |         if self.pulls[arm] < 1:
47 |             return float('+inf')
48 |         else:
49 |             # XXX We could adapt tolerance to the value of self.t
50 |             return self.klucb(self.rewards[arm] / self.pulls[arm], self.c * log(self.horizon / self.pulls[arm]) / self.pulls[arm], self.tolerance)
51 | 
52 |     def computeAllIndex(self):
53 |         """ Compute the current indexes for all arms, in a vectorized manner."""
54 |         indexes = self.klucb_vect(self.rewards / self.pulls, self.c * np.log(self.horizon / self.pulls) / self.pulls, self.tolerance)
55 |         indexes[self.pulls < 1] = float('+inf')
56 |         self.index[:] = indexes
57 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/klUCBPlus.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The improved kl-UCB policy, for one-parameter exponential distributions.
 3 | Reference: [Cappé et al. 13](https://arxiv.org/pdf/1210.1136.pdf)
 4 | """
 5 | from __future__ import division, print_function  # Python 2 compatibility
 6 | 
 7 | __author__ = "Lilian Besson"
 8 | __version__ = "0.1"
 9 | 
10 | from math import log
11 | import numpy as np
12 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
13 | 
14 | try:
15 |     from .klUCB import klUCB
16 | except ImportError:
17 |     from klUCB import klUCB
18 | 
19 | 
20 | class klUCBPlus(klUCB):
21 |     """ The improved kl-UCB policy, for one-parameter exponential distributions.
22 |     Reference: [Cappé et al. 13](https://arxiv.org/pdf/1210.1136.pdf)
23 |     """
24 | 
25 |     def __str__(self):
26 |         name = self.klucb.__name__[5:]
27 |         if name == "Bern": name = ""
28 |         complement = "{}{}".format(name, "" if self.c == 1 else r"$c={:.3g}$".format(self.c))
29 |         if complement != "": complement = "({})".format(complement)
30 |         return r"kl-UCB$^+${}".format(complement)
31 | 
32 |     def computeIndex(self, arm):
33 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k:
34 | 
35 |         .. math::
36 | 
37 |             \hat{\mu}_k(t) &= \frac{X_k(t)}{N_k(t)}, \\
38 |             U_k(t) &= \sup\limits_{q \in [a, b]} \left\{ q : \mathrm{kl}(\hat{\mu}_k(t), q) \leq \frac{c \log(t / N_k(t))}{N_k(t)} \right\},\\
39 |             I_k(t) &= U_k(t).
40 | 
41 |         If rewards are in :math:`[a, b]` (default to :math:`[0, 1]`) and :math:`\mathrm{kl}(x, y)` is the Kullback-Leibler divergence between two distributions of means x and y (see :mod:`Arms.kullback`),
42 |         and c is the parameter (default to 1).
43 |         """
44 |         if self.pulls[arm] < 1:
45 |             return float('+inf')
46 |         else:
47 |             # XXX We could adapt tolerance to the value of self.t
48 |             return self.klucb(self.rewards[arm] / self.pulls[arm], self.c * log(self.t / self.pulls[arm]) / self.pulls[arm], self.tolerance)
49 | 
50 |     def computeAllIndex(self):
51 |         """ Compute the current indexes for all arms, in a vectorized manner."""
52 |         indexes = self.klucb_vect(self.rewards / self.pulls, self.c * np.log(self.t / self.pulls) / self.pulls, self.tolerance)
53 |         indexes[self.pulls < 1] = float('+inf')
54 |         self.index[:] = indexes
55 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/klUCBloglog.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ The generic kl-UCB policy for one-parameter exponential distributions.
 3 | By default, it assumes Bernoulli arms.
 4 | Note: using log(t) + c log(log(t)) for the KL-UCB index of just log(t)
 5 | Reference: [Garivier & Cappé - COLT, 2011].
 6 | """
 7 | from __future__ import division, print_function  # Python 2 compatibility
 8 | 
 9 | __author__ = "Lilian Besson"
10 | __version__ = "0.9"
11 | 
12 | from math import log
13 | import numpy as np
14 | np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!
15 | 
16 | try:
17 |     from .klUCB import klUCB
18 | except ImportError:
19 |     from klUCB import klUCB
20 | 
21 | #: Default value for the constant c used in the computation of KL-UCB index.
22 | c = 3  #: default value, as it was in pymaBandits v1.0
23 | # c = 1  #: as suggested in the Theorem 1 in https://arxiv.org/pdf/1102.2490.pdf
24 | 
25 | 
26 | class klUCBloglog(klUCB):
27 |     """ The generic kl-UCB policy for one-parameter exponential distributions.
28 |     By default, it assumes Bernoulli arms.
29 |     Note: using log(t) + c log(log(t)) for the KL-UCB index of just log(t)
30 |     Reference: [Garivier & Cappé - COLT, 2011].
31 |     """
32 | 
33 |     # def __init__(self, nbArms, tolerance=TOLERANCE, klucb=klucbBern, c=c, lower=0., amplitude=1.):
34 |     #     super(klUCB, self).__init__(nbArms, lower=lower, amplitude=amplitude)
35 |     #     self.c = c  #: Parameter c
36 |     #     self.klucb = np.vectorize(klucb)  #: kl function to use
37 |     #     self.klucb.__name__ = klucb.__name__
38 |     #     self.tolerance = tolerance  #: Numerical tolerance
39 | 
40 |     def __str__(self):
41 |         name = self.klucb.__name__[5:]
42 |         if name == "Bern": name = ""
43 |         complement = "{}{}".format(name, "" if self.c == 3 else r"$c={:.3g}$".format(self.c))
44 |         if complement != "": complement = "({})".format(complement)
45 |         return r"kl-UCB{}".format(complement)
46 | 
47 |     def computeIndex(self, arm):
48 |         r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k:
49 | 
50 |         .. math::
51 | 
52 |             \hat{\mu}_k(t) &= \frac{X_k(t)}{N_k(t)}, \\
53 |             U_k(t) &= \sup\limits_{q \in [a, b]} \left\{ q : \mathrm{kl}(\hat{\mu}_k(t), q) \leq \frac{\log(t) + c \log(\max(1, \log(t)))}{N_k(t)} \right\},\\
54 |             I_k(t) &= U_k(t).
55 | 
56 |         If rewards are in :math:`[a, b]` (default to :math:`[0, 1]`) and :math:`\mathrm{kl}(x, y)` is the Kullback-Leibler divergence between two distributions of means x and y (see :mod:`Arms.kullback`),
57 |         and c is the parameter (default to 1).
58 |         """
59 |         if self.pulls[arm] < 1:
60 |             return float('+inf')
61 |         else:
62 |             # XXX We could adapt tolerance to the value of self.t
63 |             return self.klucb(self.rewards[arm] / self.pulls[arm], (log(self.t) + self.c * log(max(1, log(self.t)))) / self.pulls[arm], self.tolerance)
64 | 
65 |     def computeAllIndex(self):
66 |         """ Compute the current indexes for all arms, in a vectorized manner."""
67 |         indexes = self.klucb_vect(self.rewards / self.pulls, (np.log(self.t) + self.c * np.log(np.maximum(1., np.log(self.t)))) / self.pulls, self.tolerance)
68 |         indexes[self.pulls < 1] = float('+inf')
69 |         self.index[:] = indexes
70 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Basic setup.py to compile a Cython extension.
 4 | It is used to compile the ``kullback_cython`` extension, by running::
 5 | 
 6 |     $ python setup.py build_ext --inplace
 7 | 
 8 | You can also use [pyximport](http://docs.cython.org/en/latest/src/tutorial/cython_tutorial.html#pyximport-cython-compilation-for-developers) to import the ``kullback_cython`` module transparently:
 9 | 
10 | >>> import pyximport; pyximport.install()
11 | >>> import kullback_cython as kullback
12 | >>> # then use kullback.klucbBern or others, as if they came from the pure Python version!
13 | """
14 | from distutils.core import setup
15 | from distutils.extension import Extension
16 | from Cython.Build import cythonize
17 | 
18 | extensions = [
19 |     # Extension("kullback_cython", ["kullback_cython.pyx"]),
20 |     # XXX also build the extension with full name?
21 |     Extension("SMPyBandits.Policies.kullback_cython", ["kullback_cython.pyx"]),
22 | ]
23 | 
24 | setup(
25 |     ext_modules = cythonize(extensions, compiler_directives={
26 |         'embedsignature': True,
27 |         'language_level': 3,
28 |         'warn.undeclared': True,
29 |         'warn.unreachable': True,
30 |         'warn.maybe_uninitialized': True,
31 |         'warn.unused': True,
32 |         'warn.unused_arg': True,
33 |         'warn.unused_result': True,
34 |         'warn.multiple_declarators': True,
35 |     })
36 | )
37 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/usenumba.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ Import numba.jit or a dummy decorator.
 3 | """
 4 | from __future__ import division, print_function  # Python 2 compatibility
 5 | 
 6 | __author__ = "Lilian Besson"
 7 | __version__ = "0.6"
 8 | 
 9 | #: Configure the use of numba
10 | USE_NUMBA = False
11 | USE_NUMBA = True   # XXX Experimental
12 | 
13 | if not USE_NUMBA:
14 |     print("Warning: numba.jit seems to be disabled. Using a dummy decorator for numba.jit() ...")  # DEBUG
15 | 
16 | # DONE I tried numba.jit() on these functions, and it DOES not give any speedup...:-( sad sad !
17 | try:
18 |     from numba.decorators import jit
19 |     import locale  # See this bug, http://numba.pydata.org/numba-doc/dev/user/faq.html#llvm-locale-bug
20 |     locale.setlocale(locale.LC_NUMERIC, 'C')
21 |     # print("Info: numba.jit seems to be available.")  # DEBUG
22 | except ImportError:
23 |     # print("Warning: numba.jit seems to not be available. Using a dummy decorator for numba.jit() ...\nIf you want the speed up brought by numba.jit, try to manually install numba and check that it works (installing llvmlite can be tricky, cf. https://github.com/numba/numba#custom-python-environments")  # DEBUG
24 |     USE_NUMBA = False
25 | 
26 | if not USE_NUMBA:
27 |     from functools import wraps
28 | 
29 |     def jit(f):
30 |         """Fake numba.jit decorator."""
31 |         return f  # XXX isn't it enough?!
32 |         # @wraps(f)
33 |         # def wrapper(*args, **kwargs):
34 |         #     """Fake docstring, shouldn't be used thanks to wraps."""
35 |         #     return f(*args, **kwargs)
36 |         # return wrapper
37 | 
38 | 
39 | # Only export and expose the useful functions defined here
40 | __all__ = ["USE_NUMBA", "jit"]
41 | 


--------------------------------------------------------------------------------
/SMPyBandits/Policies/with_proba.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ Simply defines a function :func:`with_proba` that is used everywhere.
 3 | """
 4 | from __future__ import division, print_function  # Python 2 compatibility
 5 | 
 6 | __author__ = "Lilian Besson"
 7 | __version__ = "0.9"
 8 | 
 9 | from random import random
10 | 
11 | 
12 | # --- Utility functions
13 | 
14 | 
15 | def with_proba(epsilon):
16 |     r"""Bernoulli test, with probability :math:`\varepsilon`, return `True`, and with probability :math:`1 - \varepsilon`, return `False`.
17 | 
18 |     Example:
19 | 
20 |     >>> from random import seed; seed(0)  # reproductible
21 |     >>> with_proba(0.5)
22 |     False
23 |     >>> with_proba(0.9)
24 |     True
25 |     >>> with_proba(0.1)
26 |     False
27 |     >>> if with_proba(0.2):
28 |     ...     print("This happens 20% of the time.")
29 |     """
30 |     assert 0 <= epsilon <= 1, "Error: for 'with_proba(epsilon)', epsilon = {:.3g} has to be between 0 and 1 to be a valid probability.".format(epsilon)  # DEBUG
31 |     return random() < epsilon  # True with proba epsilon
32 | 
33 | 
34 | # --- Debugging
35 | 
36 | if __name__ == "__main__":
37 |     # Code for debugging purposes.
38 |     from doctest import testmod
39 |     print("\nTesting automatically all the docstring written in each functions of this module :")
40 |     testmod(verbose=True)
41 | 


--------------------------------------------------------------------------------
/SMPyBandits/PoliciesMultiPlayers/BaseCentralizedPolicy.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ Base class for any centralized policy, for the multi-players setting."""
 3 | from __future__ import division, print_function  # Python 2 compatibility
 4 | 
 5 | __author__ = "Lilian Besson"
 6 | __version__ = "0.6"
 7 | 
 8 | 
 9 | class BaseCentralizedPolicy(object):
10 |     """ Base class for any centralized policy, for the multi-players setting."""
11 | 
12 |     def __init__(self, nbArms):
13 |         """ New policy"""
14 |         self.nbArms = nbArms
15 | 
16 |     def __str__(self):
17 |         return self.__class__.__name__
18 | 
19 |     def startGame(self):
20 |         """ Start the simulation."""
21 |         raise NotImplementedError("This method startGame() has to be implemented in the child class inheriting from BaseCentralizedPolicy.")
22 | 
23 |     def getReward(self, arm, reward):
24 |         """ Get a reward from that arm."""
25 |         raise NotImplementedError("This method getReward(arm, reward) has to be implemented in the child class inheriting from BaseCentralizedPolicy.")
26 | 
27 |     def choice(self):
28 |         """ Choose an arm."""
29 |         raise NotImplementedError("This method choice() has to be implemented in the child class inheriting from BaseCentralizedPolicy.")
30 | 


--------------------------------------------------------------------------------
/SMPyBandits/PoliciesMultiPlayers/BaseMPPolicy.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ Base class for any multi-players policy.
 3 | 
 4 | - If rewards are not in [0, 1], be sure to give the lower value and the amplitude. Eg, if rewards are in [-3, 3], lower = -3, amplitude = 6.
 5 | """
 6 | from __future__ import division, print_function  # Python 2 compatibility
 7 | 
 8 | __author__ = "Lilian Besson"
 9 | __version__ = "0.3"
10 | 
11 | 
12 | class BaseMPPolicy(object):
13 |     """ Base class for any multi-players policy."""
14 | 
15 |     def __init__(self):
16 |         """New policy"""
17 |         pass
18 | 
19 |     def __str__(self):
20 |         return self.__class__.__name__
21 | 
22 |     # --- Proxy methods
23 | 
24 |     def _startGame_one(self, playerId):
25 |         """Forward the call to self._players[playerId]."""
26 |         return self._players[playerId].startGame()
27 | 
28 |     def _getReward_one(self, playerId, arm, reward):
29 |         """Forward the call to self._players[playerId]."""
30 |         return self._players[playerId].getReward(arm, reward)
31 | 
32 |     def _choice_one(self, playerId):
33 |         """Forward the call to self._players[playerId]."""
34 |         return self._players[playerId].choice()
35 | 
36 |     def _choiceWithRank_one(self, playerId, rank=1):
37 |         """Forward the call to self._players[playerId]."""
38 |         return self._players[playerId].choiceWithRank(rank)
39 | 
40 |     def _choiceFromSubSet_one(self, playerId, availableArms='all'):
41 |         """Forward the call to self._players[playerId]."""
42 |         return self._players[playerId].choiceFromSubSet(availableArms)
43 | 
44 |     def _choiceMultiple_one(self, playerId, nb=1):
45 |         """Forward the call to self._players[playerId]."""
46 |         return self._players[playerId].choiceMultiple(nb)
47 | 
48 |     def _choiceIMP_one(self, playerId, nb=1):
49 |         """Forward the call to self._players[playerId]."""
50 |         return self._players[playerId].choiceIMP(nb)
51 | 
52 |     def _estimatedOrder_one(self, playerId):
53 |         """Forward the call to self._players[playerId]."""
54 |         return self._players[playerId].estimatedOrder()
55 | 
56 |     def _estimatedBestArms_one(self, playerId, M=1):
57 |         """Forward the call to self._players[playerId]."""
58 |         return self._players[playerId].estimatedBestArms(M=M)
59 | 


--------------------------------------------------------------------------------
/SMPyBandits/PoliciesMultiPlayers/CentralizedIMP.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ CentralizedIMP: a multi-player policy where ONE policy is used by a centralized agent; asking the policy to select nbPlayers arms at each step, using an hybrid strategy: choose nb-1 arms with maximal empirical averages, then 1 arm with maximal index. Cf. algorithm IMP-TS [Komiyama, Honda, Nakagawa, 2016, arXiv 1506.00779].
 3 | """
 4 | from __future__ import division, print_function  # Python 2 compatibility
 5 | 
 6 | __author__ = "Lilian Besson"
 7 | __version__ = "0.2"
 8 | 
 9 | import numpy as np
10 | 
11 | from .CentralizedMultiplePlay import CentralizedMultiplePlay
12 | 
13 | 
14 | # --- Class for the mother
15 | 
16 | class CentralizedIMP(CentralizedMultiplePlay):
17 |     """ CentralizedIMP: a multi-player policy where ONE policy is used by a centralized agent; asking the policy to select nbPlayers arms at each step, using an hybrid strategy: choose nb-1 arms with maximal empirical averages, then 1 arm with maximal index. Cf. algorithm IMP-TS [Komiyama, Honda, Nakagawa, 2016, arXiv 1506.00779].
18 |     """
19 | 
20 |     def _choice_one(self, playerId):
21 |         """Use `choiceIMP` for each player."""
22 |         if playerId == 0:  # For the first player, run the method
23 |             # FIXED sort it then apply affectation_order, to fix its order ==> will have a fixed nb of switches for CentralizedMultiplePlay
24 |             if self.uniformAllocation:
25 |                 self.choices = self.player.choiceIMP(self.nbPlayers)
26 |             else:
27 |                 self.choices = np.sort(self.player.choiceIMP(self.nbPlayers))[self.affectation_order]  # XXX Increasing order...
28 |                 # self.choices = np.sort(self.player.choiceMultiple(self.nbPlayers))[self.affectation_order][::-1]  # XXX Decreasing order...
29 |             # print("At time t = {} the {} centralized policy chosed arms = {} ...".format(self.player.t, self, self.choices))  # DEBUG
30 |         # For the all players, use the pre-computed result
31 |         return self.choices[playerId]
32 | 


--------------------------------------------------------------------------------
/SMPyBandits/PoliciesMultiPlayers/README.md:
--------------------------------------------------------------------------------
 1 | # [Multi-Player policies](https://smpybandits.github.io/docs/PoliciesMultiPlayers.html)
 2 | > See here the documentation: [docs/PoliciesMultiPlayers](https://smpybandits.github.io/docs/PoliciesMultiPlayers.html)
 3 | 
 4 | 
 5 | ## List of Policies
 6 | `PoliciesMultiPlayers` folder : contains various collision-avoidance protocol for the multi-players setting.
 7 | 
 8 | - [`Selfish`](Selfish.py): a multi-player policy where every player is selfish, they do not try to handle the collisions.
 9 | 
10 | - [`CentralizedNotFair`](CentralizedNotFair.py): a multi-player policy which uses a centralize intelligence to affect users to a FIXED arm.
11 | - [`CentralizedFair`](CentralizedFair.py): a multi-player policy which uses a centralize intelligence to affect users an offset, each one take an orthogonal arm based on (offset + t) % nbArms.
12 | 
13 | - [`CentralizedMultiplePlay`](CentralizedMultiplePlay.py) and [`CentralizedIMP`](CentralizedIMP.py): multi-player policies that use centralized but non-omniscient learning to select K = nbPlayers arms at each time step.
14 | 
15 | - [`OracleNotFair`](OracleNotFair.py): a multi-player policy with full knowledge and centralized intelligence to affect users to a FIXED arm, among the best arms.
16 | - [`OracleFair`](OracleFair.py): a multi-player policy which uses a centralized intelligence to affect users an offset, each one take an orthogonal arm based on (offset + t) % nbBestArms, among the best arms.
17 | 
18 | - [`rhoRand`](rhoRand.py), [`ALOHA`](ALOHA.py): implementation of generic collision avoidance algorithms, relying on a single-player bandit policy (eg. [`UCB`](UCB.py), [`Thompson`](Thompson.py) etc). And variants, [`rhoRandRand`](rhoRandRand.py), [`rhoRandSticky`](rhoRandSticky.py), [`rhoRandRotating`](rhoRandRotating.py), [`rhoRandEst`](rhoRandEst.py), [`rhoLearn`](rhoLearn.py), [`rhoLearnEst`](rhoLearnEst.py), [`rhoLearnExp3`](rhoLearnExp3.py), [`rhoRandALOHA`](rhoRandALOHA.py),
19 | - [`rhoCentralized`](rhoCentralized.py) is a semi-centralized version where orthogonal ranks 1..M are given to the players, instead of just giving them the value of M, but a decentralized learning policy is still used to learn the best arms.
20 | - [`RandTopM`](RandTopM.py) is another approach, similar to [`rhoRandSticky`](rhoRandSticky.py) and [`MusicalChair`](MusicalChair.py), but we hope it will be better, and we succeed in analyzing more easily.
21 | 
22 | ## API
23 | All policies have the same interface, as described in [`BaseMPPolicy`](BaseMPPolicy.py) for decentralized policies,
24 | and [`BaseCentralizedPolicy`](BaseCentralizedPolicy.py) for centralized policies,
25 | in order to use them in any experiment with the following approach:
26 | 
27 | ```python
28 | my_policy_MP = Policy_MP(nbPlayers, nbArms)
29 | children = my_policy_MP.children             # get a list of usable single-player policies
30 | for one_policy in children:
31 |     one_policy.startGame()                       # start the game
32 | for t in range(T):
33 |     for i in range(nbPlayers):
34 |         k_t[i] = children[i].choice()            # chose one arm, for each player
35 |     for k in range(nbArms):
36 |         players_who_played_k = [ k_t[i] for i in range(nbPlayers) if k_t[i] == k ]
37 |         reward = reward_t[k] = sampled from the arm k     # sample a reward
38 |         if len(players_who_played_k) > 1:
39 |             reward = 0
40 |         for i in players_who_played_k:
41 |             children[i].getReward(k, reward)
42 | ```


--------------------------------------------------------------------------------
/SMPyBandits/PoliciesMultiPlayers/with_proba.py:
--------------------------------------------------------------------------------
1 | ../Policies/with_proba.py


--------------------------------------------------------------------------------
/SMPyBandits/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Open-Source Python package for Single- and Multi-Players multi-armed Bandits algorithms.
 5 | 
 6 | - Homepage: https://SMPyBandits.GitHub.io/
 7 | - Author: Lilian Besson and contributors
 8 | - License: MIT
 9 | - Date: October 2019
10 | """
11 | from __future__ import division, print_function  # Python 2 compatibility
12 | 
13 | __author__ = "Lilian Besson"
14 | __version__ = "0.9.7"
15 | 
16 | try:
17 |     # from .Arms import *
18 |     from SMPyBandits import Arms
19 | except ImportError:
20 |     pass
21 | 
22 | try:
23 |     # from .Environment import *
24 |     from SMPyBandits import Environment
25 | except ImportError:
26 |     pass
27 | 
28 | try:
29 |     # from .Policies import *
30 |     from SMPyBandits import Policies
31 | except ImportError:
32 |     pass
33 | 
34 | # try:
35 | #     # from .Policies.Posterior import *
36 | #     from SMPyBandits.Policies import Posterior
37 | # except ImportError:
38 | #     pass
39 | 
40 | try:
41 |     # from .PoliciesMultiPlayers import *
42 |     from SMPyBandits import PoliciesMultiPlayers
43 | except ImportError:
44 |     pass
45 | 


--------------------------------------------------------------------------------
/SMPyBandits/example_of_main_singleplayer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | An example of a simple 'main' script.
 5 | Main scripts load the config, run the simulations, and plot them, for the single player case.
 6 | """
 7 | from __future__ import division, print_function  # Python 2 compatibility
 8 | 
 9 | __author__ = "Lilian Besson"
10 | __version__ = "0.9"
11 | 
12 | import sys
13 | if __name__ != '__main__':
14 |     sys.exit(0)
15 | 
16 | from Environment import Evaluator, notify
17 | 
18 | if 'very_simple_configuration' in sys.argv or 'very_simple_configuration.py' in sys.argv:
19 |     from very_simple_configuration import configuration
20 | else:
21 |     from example_of_configuration_singleplayer import configuration
22 | 
23 | configuration['showplot'] = True
24 | 
25 | evaluation = Evaluator(configuration)
26 | 
27 | # Start the evaluation and then print final ranking and plot, for each environment
28 | for envId, env in enumerate(evaluation.envs):
29 |     # Evaluate just that env
30 |     evaluation.startOneEnv(envId, env)
31 | 
32 | # Compare them
33 | for envId, env in enumerate(evaluation.envs):
34 |     evaluation.plotHistoryOfMeans(envId)  # XXX To plot without saving
35 | 
36 |     print("\nGiving all the vector of final regrets ...")
37 |     evaluation.printLastRegrets(envId)
38 |     print("\nGiving the final ranking ...")
39 |     evaluation.printFinalRanking(envId)
40 | 
41 |     print("\n\n- Plotting the last regrets...")
42 |     evaluation.plotLastRegrets(envId, boxplot=True)
43 | 
44 |     print("\nGiving the mean and std running times ...")
45 |     evaluation.printRunningTimes(envId)
46 |     evaluation.plotRunningTimes(envId)
47 | 
48 |     print("\nGiving the mean and std running times ...")
49 |     evaluation.printMemoryConsumption(envId)
50 |     evaluation.plotMemoryConsumption(envId)
51 | 
52 |     print("\n\n- Plotting the mean reward...")
53 |     evaluation.plotRegrets(envId, meanReward=True)
54 | 
55 |     print("\n\n- Plotting the regret...")
56 |     evaluation.plotRegrets(envId)
57 | 
58 |     print("\n- Plotting the probability of picking the best arm of time...")
59 |     evaluation.plotBestArmPulls(envId)
60 | 
61 |     print("\n- Plotting the histograms of regrets...")
62 |     evaluation.plotLastRegrets(envId, sharex=True, sharey=True)
63 | 
64 | # Done
65 | print("Done for simulations example_of_main_singleplayer ...")
66 | notify("Done for simulations example_of_main_singleplayer ...")
67 | 


--------------------------------------------------------------------------------
/SMPyBandits/include/README.md:
--------------------------------------------------------------------------------
1 | # Include
2 | - Just [docopt-cpp](https://github.com/docopt/docopt.cpp)


--------------------------------------------------------------------------------
/SMPyBandits/include/docopt_util.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  docopt_util.h
  3 | //  docopt
  4 | //
  5 | //  Created by Jared Grubb on 2013-11-04.
  6 | //  Copyright (c) 2013 Jared Grubb. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef docopt_docopt_util_h
 10 | #define docopt_docopt_util_h
 11 | 
 12 | #if DOCTOPT_USE_BOOST_REGEX
 13 | #include <boost/regex.hpp>
 14 | namespace std {
 15 |     using boost::regex;
 16 |     using boost::sregex_token_iterator;
 17 | }
 18 | #else
 19 | #include <regex>
 20 | #endif
 21 | 
 22 | #pragma mark -
 23 | #pragma mark General utility
 24 | 
 25 | namespace {
 26 | 	bool starts_with(std::string const& str, std::string const& prefix)
 27 | 	{
 28 | 		if (str.length() < prefix.length())
 29 | 			return false;
 30 | 		return std::equal(prefix.begin(), prefix.end(),
 31 | 				  str.begin());
 32 | 	}
 33 | 
 34 | 	std::string trim(std::string&& str,
 35 | 			 const std::string& whitespace = " \t\n")
 36 | 	{
 37 | 		const auto strEnd = str.find_last_not_of(whitespace);
 38 | 		if (strEnd==std::string::npos)
 39 | 			return {}; // no content
 40 | 		str.erase(strEnd+1);
 41 | 
 42 | 		const auto strBegin = str.find_first_not_of(whitespace);
 43 | 		str.erase(0, strBegin);
 44 | 
 45 | 		return std::move(str);
 46 | 	}
 47 | 
 48 | 	std::vector<std::string> split(std::string const& str, size_t pos = 0)
 49 | 	{
 50 | 		const char* const anySpace = " \t\r\n\v\f";
 51 | 
 52 | 		std::vector<std::string> ret;
 53 | 		while (pos != std::string::npos) {
 54 | 			auto start = str.find_first_not_of(anySpace, pos);
 55 | 			if (start == std::string::npos) break;
 56 | 
 57 | 			auto end = str.find_first_of(anySpace, start);
 58 | 			auto size = end==std::string::npos ? end : end-start;
 59 | 			ret.emplace_back(str.substr(start, size));
 60 | 
 61 | 			pos = end;
 62 | 		}
 63 | 
 64 | 		return ret;
 65 | 	}
 66 | 
 67 | 	std::tuple<std::string, std::string, std::string> partition(std::string str, std::string const& point)
 68 | 	{
 69 | 		std::tuple<std::string, std::string, std::string> ret;
 70 | 
 71 | 		auto i = str.find(point);
 72 | 
 73 | 		if (i == std::string::npos) {
 74 | 			// no match: string goes in 0th spot only
 75 | 		} else {
 76 | 			std::get<2>(ret) = str.substr(i + point.size());
 77 | 			std::get<1>(ret) = point;
 78 | 			str.resize(i);
 79 | 		}
 80 | 		std::get<0>(ret) = std::move(str);
 81 | 
 82 | 		return ret;
 83 | 	}
 84 | 
 85 | 	template <typename I>
 86 | 	std::string join(I iter, I end, std::string const& delim) {
 87 | 		if (iter==end)
 88 | 			return {};
 89 | 
 90 | 		std::string ret = *iter;
 91 | 		for(++iter; iter!=end; ++iter) {
 92 | 			ret.append(delim);
 93 | 			ret.append(*iter);
 94 | 		}
 95 | 		return ret;
 96 | 	}
 97 | 
 98 | 	std::vector<std::string> regex_split(std::string const& text, std::regex const& re)
 99 | 	{
100 | 		std::vector<std::string> ret;
101 | 		for (auto it = std::sregex_token_iterator(text.begin(), text.end(), re, -1);
102 | 			it != std::sregex_token_iterator();
103 | 			++it) {
104 | 			ret.emplace_back(*it);
105 | 		}
106 | 		return ret;
107 | 	}
108 | }
109 | 
110 | namespace docopt {
111 | 	template <class T>
112 | 	inline void hash_combine(std::size_t& seed, T const& v)
113 | 	{
114 | 		// stolen from boost::hash_combine
115 | 		std::hash<T> hasher;
116 | 		seed ^= hasher(v) + 0x9e3779b9 + (seed<<6) + (seed>>2);
117 | 	}
118 | }
119 | 
120 | #endif
121 | 


--------------------------------------------------------------------------------
/SMPyBandits/very_simple_configuration.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | An very simple configuration file to run some basic simulations about stationary multi-armed bandits.
 4 | """
 5 | 
 6 | from Arms import *
 7 | 
 8 | from Environment import MAB
 9 | 
10 | from Policies import *
11 | 
12 | # --- Parameters of the experiments
13 | HORIZON = 30
14 | 
15 | REPETITIONS = 1
16 | 
17 | NB_ARMS = 5
18 | 
19 | ARM_TYPE = Bernoulli
20 | 
21 | # Like http://localhost/publis/tiny-d3-bandit-animation.git/index.html?T=30&MU=0.1,0.2,0.3,0.4,0.9
22 | MEANS = [0.1, 0.2, 0.3, 0.4, 0.9]
23 | 
24 | 
25 | #: This dictionary configures the experiments
26 | configuration = {
27 |     # --- Duration of the experiment
28 |     "horizon": HORIZON,
29 |     # --- Number of repetition of the experiment (to have an average)
30 |     "repetitions": REPETITIONS,
31 |     # --- Parameters for the use of joblib.Parallel
32 |     "n_jobs": 1,         # = nb of CPU cores
33 |     "verbosity": 6,      # Max joblib verbosity
34 |     # --- Other parameters for the Evaluator
35 |     "finalRanksOnAverage": True,  # Use an average instead of the last value for the final ranking of the tested players
36 |     "averageOn": 1e-3,  # Average the final rank on the 1.% last time steps
37 |     # --- Should we plot the lower-bounds or not?
38 |     "plot_lowerbounds": False,  # XXX Default
39 |     # --- Arms
40 |     "environment": [
41 |         {   # Use vector from command line
42 |             "arm_type": ARM_TYPE,
43 |             "params": MEANS
44 |         },
45 |     ],
46 | }
47 | 
48 | configuration.update({
49 |     "policies": [
50 |         # --- Full or partial knowledge algorithms
51 |         { "archtype": TakeFixedArm, "params": { "armIndex": 0 }},  # Take worse arm!
52 |         { "archtype": TakeFixedArm, "params": { "armIndex": 1 }},  # Take second worse arm!
53 |         { "archtype": TakeFixedArm, "params": { "armIndex": 2 }},  # Take third worse arm!
54 |         { "archtype": TakeFixedArm, "params": { "armIndex": 3 }},  # Take forth worse arm!
55 |         { "archtype": TakeFixedArm, "params": { "armIndex": 4 }},  # Take fifth worse arm!
56 |         # --- Stupid algorithms
57 |         {
58 |             "archtype": Uniform,   # The stupidest policy, fully uniform
59 |             "params": {}
60 |         },
61 |         # --- UCB algorithm
62 |         {
63 |             "archtype": UCB,   # UCB with alpha=1 parameter
64 |             "params": {}
65 |         },
66 |         # --- Thompson algorithm
67 |         {
68 |             "archtype": Thompson,
69 |             "params": {}
70 |         },
71 |         # --- KL UCB algorithm
72 |         {
73 |             "archtype": klUCB,
74 |             "params": {}
75 |         },
76 |         # --- BESA algorithm
77 |         {
78 |             "archtype": BESA,
79 |             "params": {
80 |                 "horizon": HORIZON,
81 |             }
82 |         },
83 |         # --- MOSS algorithm
84 |         {
85 |             "archtype": MOSS,
86 |             "params": {}
87 |         },
88 |         # --- Exp3++ algorithm
89 |         {
90 |             "archtype": Exp3PlusPlus,
91 |             "params": {}
92 |         },
93 |     ]}
94 | )
95 | 
96 | # DONE
97 | print("Loaded experiments configuration from 'example_of_configuration_singleplayer.py' :")
98 | print("configuration =", configuration)  # DEBUG
99 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | 
 4 | ARG PYTHON_VERSION=3.9
 5 | 
 6 | 
 7 | # Install necessary building tools and dependencies
 8 | RUN apt-get update && apt-get install -y --no-install-recommends \
 9 |          build-essential \
10 |          git \
11 |          curl \
12 |          sudo \
13 |          bzip2 \
14 |          libx11-6 \
15 |          ca-certificates \
16 |          libjpeg-dev \
17 |          libpng-dev && \
18 |      rm -rf /var/lib/apt/lists/*
19 | 
20 | RUN apt-get update && apt-get -y dist-upgrade && apt-get purge -y libboost-all-dev && \
21 |     apt-get install -f -y libboost-all-dev && \
22 |     rm -rf /var/lib/apt/lists/*
23 | 
24 | 
25 | # Create a working directory
26 | RUN mkdir /app
27 | WORKDIR /app
28 | 
29 | 
30 | # Install conda
31 | RUN curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
32 |     chmod +x ~/miniconda.sh && \
33 |     ~/miniconda.sh -b -p /opt/conda && \
34 |     rm ~/miniconda.sh && \
35 |     /opt/conda/bin/conda install -y python=$PYTHON_VERSION numpy pyyaml scipy ipython mkl mkl-include ninja cython typing && \
36 |     /opt/conda/bin/conda clean -ya
37 | 
38 | 
39 | ENV PATH /opt/conda/bin:$PATH
40 | 
41 | RUN apt-get update
42 | RUN apt-get install -y vim lshw
43 | RUN apt-get install -y ghostscript
44 | RUN apt-get install -y poppler-utils
45 | 
46 | ENV LANG C.UTF-8
47 | 
48 | RUN export CC=mpicc && export HDF5_MPI="ON" && /opt/conda/bin/pip install versioned-hdf5
49 | 
50 | # Install data science and machine learning packages using conda
51 | RUN /opt/conda/bin/conda install -y -c conda-forge joblib numba tqdm && \
52 |     /opt/conda/bin/conda install -y -c conda-forge sphinx_rtd_theme recommonmark nbsphinx ipywidgets && \
53 |     /opt/conda/bin/conda clean -ya
54 | 
55 | 
56 | # Install data science and machine learning packages using conda
57 | RUN /opt/conda/bin/conda install -y -c conda-forge scikit-learn scikit-optimize pandas seaborn && \
58 |     /opt/conda/bin/conda clean -ya
59 | 
60 | RUN /opt/conda/bin/conda install -y -c conda-forge matplotlib">=3.4.3" && \
61 |     /opt/conda/bin/conda install -c anaconda python-dateutil && \
62 |     /opt/conda/bin/conda clean -ya
63 | 
64 | RUN /opt/conda/bin/conda install -y -c anaconda ujson && \
65 |     /opt/conda/bin/conda clean -ya
66 | 
67 | 
68 | # Install Jupyter
69 | RUN /opt/conda/bin/conda install -y -c conda-forge jupyterlab jupyter_http_over_ws nodejs">=16.0.0"  && \
70 |     /opt/conda/bin/conda clean -ya
71 | 
72 | RUN /opt/conda/bin/conda install -y -c anaconda nbformat && \
73 |     /opt/conda/bin/conda install -y ipykernel && \
74 |     /opt/conda/bin/conda clean -ya
75 | RUN jupyter serverextension enable --py jupyter_http_over_ws
76 | 
77 | RUN python3 -m ipykernel.kernelspec
78 | 


--------------------------------------------------------------------------------
/strategic_scripts/main.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | from pathlib import Path
 4 | 
 5 | from run_experiment import runner
 6 | 
 7 | 
 8 | if __name__ == "__main__":
 9 |     # python3 main.py --setup=default.json > log/default.log 2>&1 &
10 |     # python3 main.py --setup=05X50.json > log/05X50.log 2>&1 &
11 |     # ps | grep python3 main.py
12 |     parser = argparse.ArgumentParser(description="Get json file containing experiment setup")
13 |     parser.add_argument("--setup", type=str)
14 |     parser.add_argument("--policy", type=str)
15 |     parser.add_argument("--L", type=int)
16 |     args = parser.parse_args()
17 | 
18 |     setup_file = str(args.setup)
19 |     policy_str = str(args.policy)
20 |     L = int(args.L) if args.L else None
21 |     assert ".json" in setup_file, "setup file name should contain '.json'"
22 | 
23 |     AVALIABLE_POLICY = [
24 |         'UCB',
25 |         'H_UCB',
26 |         'RH_UCB',
27 |         'Sampled_R_UCB',
28 |     ]
29 |     assert policy_str in AVALIABLE_POLICY, "policy is not available"
30 | 
31 |     with open(setup_file) as json_file:
32 |         data = json.load(json_file)
33 | 
34 |         experiment_name = str(Path(setup_file).stem)
35 | 
36 |         horizon = data["horizon"]
37 |         repetitions = data["repetitions"]
38 |         n_jobs = data["n_jobs"]
39 |         verbosity = data["verbosity"]
40 | 
41 |         arm_type = data["arm_type"]
42 |         agent_arm_dict = data["agent_arm_dict"]
43 | 
44 |         save_json = data["save_json"]
45 |         save_h5py = data["save_h5py"]
46 |         save_pickle = data["save_pickle"]
47 | 
48 |     runner(
49 |         experiment_name, policy_str,
50 |         horizon, repetitions, n_jobs, verbosity,
51 |         arm_type, agent_arm_dict, L,
52 |         save_json, save_h5py, save_pickle
53 |     )
54 | 


--------------------------------------------------------------------------------
/strategic_scripts/run_h_ucb.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # currentShellPID=$(echo $$)
 4 | # echo $currentShellPID && taskset -cp 25-49 $currentShellPID && echo $currentShellPID
 5 | 
 6 | 
 7 | declare -a setupnames=(
 8 |     "N100_05X100"
 9 | )
10 | 
11 | for i in "${setupnames[@]}"
12 | do
13 |     python3 main.py --setup=setups/${i}.json --policy=H_UCB > log/${i}_H_UCB.log 2>&1
14 | done
15 | 
16 | # python3 main.py --setup=setups/N100_05X100.json --policy=UCB


--------------------------------------------------------------------------------
/strategic_scripts/run_rh_ucb.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # currentShellPID=$(echo $$)
 4 | # echo $currentShellPID && taskset -cp 50-62 $currentShellPID && echo $currentShellPID
 5 | 
 6 | 
 7 | declare -a setupnames=(
 8 |     "N100_05X100"
 9 | )
10 | 
11 | for i in "${setupnames[@]}"
12 | do
13 |     python3 main.py --setup=setups/${i}.json --policy=RH_UCB > log/${i}_RH_UCB.log 2>&1
14 | done
15 | 


--------------------------------------------------------------------------------
/strategic_scripts/run_sampled_r_ucb.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # currentShellPID=$(echo $$)
 4 | # echo $currentShellPID && taskset -cp 63-75 $currentShellPID && echo $currentShellPID
 5 | 
 6 | 
 7 | declare -a basicsetupnames=(
 8 |     "N100_05X100"
 9 | )
10 | 
11 | for i in "${basicsetupnames[@]}"
12 | do
13 |     python3 main.py --setup=setups/${i}.json --policy=Sampled_R_UCB --L=5 > log/${i}_Sampled_R_UCB_L5.log 2>&1
14 | done
15 | 


--------------------------------------------------------------------------------
/strategic_scripts/run_ucb.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # currentShellPID=$(echo $$)
 4 | # echo $currentShellPID && taskset -cp 0-24 $currentShellPID && echo $currentShellPID
 5 | 
 6 | 
 7 | declare -a setupnames=(
 8 |     "N100_05X100"
 9 | )
10 | 
11 | for i in "${setupnames[@]}"
12 | do
13 |     python3 main.py --setup=setups/${i}.json --policy=UCB > log/${i}_UCB.log 2>&1
14 | done
15 | 
16 | 


--------------------------------------------------------------------------------
/strategic_scripts/setups/N100_05X100.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "horizon": 100000,
 3 |     "repetitions": 100,
 4 |     "n_jobs": 50,
 5 |     "verbosity": 0,
 6 | 
 7 |     "arm_type": "Bernoulli",
 8 |     "agent_arm_dict": {
 9 |         "0": {"param": [0.9], "repeat": [1]},
10 |         "1": {"param": [0.8], "repeat": [1]},
11 |         "2": {"param": [0.7], "repeat": [1]},
12 |         "3": {"param": [0.6], "repeat": [1]},
13 |         "4": {"param": [0.5], "repeat": [100]}
14 |     },
15 | 
16 |     "save_json": true,
17 |     "save_h5py": false,
18 |     "save_pickle": false
19 | }
20 | 


--------------------------------------------------------------------------------
/strategic_scripts/setups/N100_05X200.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "horizon": 100000,
 3 |     "repetitions": 100,
 4 |     "n_jobs": 50,
 5 |     "verbosity": 0,
 6 | 
 7 |     "arm_type": "Bernoulli",
 8 |     "agent_arm_dict": {
 9 |         "0": {"param": [0.9], "repeat": [1]},
10 |         "1": {"param": [0.8], "repeat": [1]},
11 |         "2": {"param": [0.7], "repeat": [1]},
12 |         "3": {"param": [0.6], "repeat": [1]},
13 |         "4": {"param": [0.5], "repeat": [200]}
14 |     },
15 | 
16 |     "save_json": true,
17 |     "save_h5py": false,
18 |     "save_pickle": false
19 | }
20 | 


--------------------------------------------------------------------------------
/strategic_scripts/setups/N100_05X300.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "horizon": 100000,
 3 |     "repetitions": 100,
 4 |     "n_jobs": 50,
 5 |     "verbosity": 0,
 6 | 
 7 |     "arm_type": "Bernoulli",
 8 |     "agent_arm_dict": {
 9 |         "0": {"param": [0.9], "repeat": [1]},
10 |         "1": {"param": [0.8], "repeat": [1]},
11 |         "2": {"param": [0.7], "repeat": [1]},
12 |         "3": {"param": [0.6], "repeat": [1]},
13 |         "4": {"param": [0.5], "repeat": [300]}
14 |     },
15 | 
16 |     "save_json": true,
17 |     "save_h5py": false,
18 |     "save_pickle": false
19 | }
20 | 


--------------------------------------------------------------------------------
/strategic_scripts/setups/N100_05X400.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "horizon": 100000,
 3 |     "repetitions": 100,
 4 |     "n_jobs": 50,
 5 |     "verbosity": 0,
 6 | 
 7 |     "arm_type": "Bernoulli",
 8 |     "agent_arm_dict": {
 9 |         "0": {"param": [0.9], "repeat": [1]},
10 |         "1": {"param": [0.8], "repeat": [1]},
11 |         "2": {"param": [0.7], "repeat": [1]},
12 |         "3": {"param": [0.6], "repeat": [1]},
13 |         "4": {"param": [0.5], "repeat": [400]}
14 |     },
15 | 
16 |     "save_json": true,
17 |     "save_h5py": false,
18 |     "save_pickle": false
19 | }
20 | 


--------------------------------------------------------------------------------
/strategic_scripts/setups/N100_05X500.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "horizon": 100000,
 3 |     "repetitions": 100,
 4 |     "n_jobs": 50,
 5 |     "verbosity": 0,
 6 | 
 7 |     "arm_type": "Bernoulli",
 8 |     "agent_arm_dict": {
 9 |         "0": {"param": [0.9], "repeat": [1]},
10 |         "1": {"param": [0.8], "repeat": [1]},
11 |         "2": {"param": [0.7], "repeat": [1]},
12 |         "3": {"param": [0.6], "repeat": [1]},
13 |         "4": {"param": [0.5], "repeat": [500]}
14 |     },
15 | 
16 |     "save_json": true,
17 |     "save_h5py": false,
18 |     "save_pickle": false
19 | }
20 | 


--------------------------------------------------------------------------------
/strategic_scripts/setups/N100_09X100.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "horizon": 100000,
 3 |     "repetitions": 100,
 4 |     "n_jobs": 50,
 5 |     "verbosity": 0,
 6 | 
 7 |     "arm_type": "Bernoulli",
 8 |     "agent_arm_dict": {
 9 |         "0": {"param": [0.9], "repeat": [100]},
10 |         "1": {"param": [0.8], "repeat": [1]},
11 |         "2": {"param": [0.7], "repeat": [1]},
12 |         "3": {"param": [0.6], "repeat": [1]},
13 |         "4": {"param": [0.5], "repeat": [1]}
14 |     },
15 | 
16 |     "save_json": true,
17 |     "save_h5py": false,
18 |     "save_pickle": false
19 | }
20 | 


--------------------------------------------------------------------------------
/strategic_scripts/setups/N100_09X200.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "horizon": 100000,
 3 |     "repetitions": 100,
 4 |     "n_jobs": 50,
 5 |     "verbosity": 0,
 6 | 
 7 |     "arm_type": "Bernoulli",
 8 |     "agent_arm_dict": {
 9 |         "0": {"param": [0.9], "repeat": [200]},
10 |         "1": {"param": [0.8], "repeat": [1]},
11 |         "2": {"param": [0.7], "repeat": [1]},
12 |         "3": {"param": [0.6], "repeat": [1]},
13 |         "4": {"param": [0.5], "repeat": [1]}
14 |     },
15 | 
16 |     "save_json": true,
17 |     "save_h5py": false,
18 |     "save_pickle": false
19 | }
20 | 


--------------------------------------------------------------------------------
/strategic_scripts/setups/N100_09X300.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "horizon": 100000,
 3 |     "repetitions": 100,
 4 |     "n_jobs": 50,
 5 |     "verbosity": 0,
 6 | 
 7 |     "arm_type": "Bernoulli",
 8 |     "agent_arm_dict": {
 9 |         "0": {"param": [0.9], "repeat": [300]},
10 |         "1": {"param": [0.8], "repeat": [1]},
11 |         "2": {"param": [0.7], "repeat": [1]},
12 |         "3": {"param": [0.6], "repeat": [1]},
13 |         "4": {"param": [0.5], "repeat": [1]}
14 |     },
15 | 
16 |     "save_json": true,
17 |     "save_h5py": false,
18 |     "save_pickle": false
19 | }
20 | 


--------------------------------------------------------------------------------
/strategic_scripts/setups/N100_09X400.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "horizon": 100000,
 3 |     "repetitions": 100,
 4 |     "n_jobs": 50,
 5 |     "verbosity": 0,
 6 | 
 7 |     "arm_type": "Bernoulli",
 8 |     "agent_arm_dict": {
 9 |         "0": {"param": [0.9], "repeat": [400]},
10 |         "1": {"param": [0.8], "repeat": [1]},
11 |         "2": {"param": [0.7], "repeat": [1]},
12 |         "3": {"param": [0.6], "repeat": [1]},
13 |         "4": {"param": [0.5], "repeat": [1]}
14 |     },
15 | 
16 |     "save_json": true,
17 |     "save_h5py": false,
18 |     "save_pickle": false
19 | }
20 | 


--------------------------------------------------------------------------------
/strategic_scripts/setups/N100_09X500.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "horizon": 100000,
 3 |     "repetitions": 100,
 4 |     "n_jobs": 50,
 5 |     "verbosity": 0,
 6 | 
 7 |     "arm_type": "Bernoulli",
 8 |     "agent_arm_dict": {
 9 |         "0": {"param": [0.9], "repeat": [500]},
10 |         "1": {"param": [0.8], "repeat": [1]},
11 |         "2": {"param": [0.7], "repeat": [1]},
12 |         "3": {"param": [0.6], "repeat": [1]},
13 |         "4": {"param": [0.5], "repeat": [1]}
14 |     },
15 | 
16 |     "save_json": true,
17 |     "save_h5py": false,
18 |     "save_pickle": false
19 | }
20 | 


--------------------------------------------------------------------------------
/strategic_scripts/setups/N100_default.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "horizon": 100000,
 3 |     "repetitions": 100,
 4 |     "n_jobs": 50,
 5 |     "verbosity": 0,
 6 | 
 7 |     "arm_type": "Bernoulli",
 8 |     "agent_arm_dict": {
 9 |         "0": {"param": [0.9], "repeat": [1]},
10 |         "1": {"param": [0.8], "repeat": [1]},
11 |         "2": {"param": [0.7], "repeat": [1]},
12 |         "3": {"param": [0.6], "repeat": [1]},
13 |         "4": {"param": [0.5], "repeat": [1]}
14 |     },
15 | 
16 |     "save_json": true,
17 |     "save_h5py": false,
18 |     "save_pickle": false
19 | }
20 | 


--------------------------------------------------------------------------------
/strategic_scripts/setups/N100_rh_ucb_best_10_100_replicate1000X3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "horizon": 100000,
 3 |     "repetitions": 100,
 4 |     "n_jobs": 50,
 5 |     "verbosity": 0,
 6 | 
 7 |     "arm_type": "Bernoulli",
 8 |     "agent_arm_dict": {
 9 |         "0": {"param": [0.9, 0.2, 0.1], "repeat": [10, 100, 100]},
10 |         "1": {"param": [0.8, 0.2, 0.1], "repeat": [10, 100, 100]},
11 |         "2": {"param": [0.7, 0.2, 0.1], "repeat": [1000, 1000, 1000]},
12 |         "3": {"param": [0.6, 0.2, 0.1], "repeat": [1000, 1000, 1000]},
13 |         "4": {"param": [0.5, 0.2, 0.1], "repeat": [1000, 1000, 1000]}
14 |     },
15 | 
16 |     "save_json": true,
17 |     "save_h5py": false,
18 |     "save_pickle": false
19 | }
20 | 


--------------------------------------------------------------------------------
/strategic_scripts/setups/N100_single_origin_arm1000X1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "horizon": 100000,
 3 |     "repetitions": 100,
 4 |     "n_jobs": 50,
 5 |     "verbosity": 0,
 6 | 
 7 |     "arm_type": "Bernoulli",
 8 |     "agent_arm_dict": {
 9 |         "0": {"param": [0.9], "repeat": [1]},
10 |         "1": {"param": [0.8], "repeat": [1]},
11 |         "2": {"param": [0.7], "repeat": [1]},
12 |         "3": {"param": [0.6], "repeat": [1]},
13 |         "4": {"param": [0.5], "repeat": [1000]}
14 |     },
15 | 
16 |     "save_json": true,
17 |     "save_h5py": false,
18 |     "save_pickle": false
19 | }
20 | 


--------------------------------------------------------------------------------
/strategic_scripts/setups/N100_single_origin_arm1000X4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "horizon": 100000,
 3 |     "repetitions": 100,
 4 |     "n_jobs": 50,
 5 |     "verbosity": 0,
 6 | 
 7 |     "arm_type": "Bernoulli",
 8 |     "agent_arm_dict": {
 9 |         "0": {"param": [0.9], "repeat": [1]},
10 |         "1": {"param": [0.8], "repeat": [1000]},
11 |         "2": {"param": [0.7], "repeat": [1000]},
12 |         "3": {"param": [0.6], "repeat": [1000]},
13 |         "4": {"param": [0.5], "repeat": [1000]}
14 |     },
15 | 
16 |     "save_json": true,
17 |     "save_h5py": false,
18 |     "save_pickle": false
19 | }
20 | 


--------------------------------------------------------------------------------