├── .gitignore ├── README.md ├── SMPyBandits ├── Arms │ ├── Arm.py │ ├── Bernoulli.py │ ├── Binomial.py │ ├── Constant.py │ ├── DiscreteArm.py │ ├── Exponential.py │ ├── Gamma.py │ ├── Gaussian.py │ ├── Poisson.py │ ├── README.md │ ├── RestedRottingArm.py │ ├── RestlessArm.py │ ├── UniformArm.py │ ├── __init__.py │ ├── kullback.py │ └── usenumba.py ├── C_Interface │ ├── .gitignore │ ├── Makefile │ ├── README.md │ ├── Sample.py │ ├── test.c │ ├── test.cpp │ ├── test.py │ ├── test2.cpp │ └── test_sub.cpp ├── Environment │ ├── CollisionModels.py │ ├── Evaluator.py │ ├── EvaluatorMultiPlayers.py │ ├── EvaluatorSparseMultiPlayers.py │ ├── MAB.py │ ├── MAB_rotting.py │ ├── README.md │ ├── Result.py │ ├── ResultMultiPlayers.py │ ├── StrategicEvaluator.py │ ├── StrategicResult.py │ ├── __init__.py │ ├── fairnessMeasures.py │ ├── memory_consumption.py │ ├── notify.py │ ├── plot_Cmu_HOI.py │ ├── plotsettings.py │ ├── pykov.py │ ├── sortedDistance.py │ ├── usejoblib.py │ ├── usenumba.py │ └── usetqdm.py ├── Experiment │ ├── Seznec19a_Fig1 │ │ ├── .gitignore │ │ ├── main.py │ │ ├── plot.py │ │ └── style.mplstyle │ ├── Seznec19a_Fig2 │ │ ├── .gitignore │ │ ├── main.py │ │ ├── plot.py │ │ └── style.mplstyle │ ├── Seznec19a_Fig3 │ │ ├── .gitignore │ │ ├── main.py │ │ ├── plot.py │ │ └── style.mplstyle │ ├── Seznec19b_Fig1 │ │ ├── .gitignore │ │ ├── main.py │ │ ├── plot.py │ │ ├── prepare_yahoo_data.py │ │ └── style.mplstyle │ ├── Seznec_EFF │ │ ├── .gitignore │ │ ├── main.py │ │ ├── plot.py │ │ ├── plot_delay.py │ │ └── style.mplstyle │ └── Seznec_asymptotic │ │ ├── .gitignore │ │ ├── main.py │ │ ├── plot.py │ │ └── style.mplstyle ├── LICENSE ├── Policies │ ├── .gitignore │ ├── AdBandits.py │ ├── AdSwitch.py │ ├── AdSwitchNew.py │ ├── Aggregator.py │ ├── ApproximatedFHGittins.py │ ├── BESA.py │ ├── BasePolicy.py │ ├── BaseWrapperPolicy.py │ ├── BayesUCB.py │ ├── BayesianIndexPolicy.py │ ├── BoltzmannGumbel.py │ ├── C │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── README.md │ │ ├── kullback_py3.c │ │ ├── setup.py │ │ └── setup.py3 │ ├── CD_UCB.py │ ├── CORRAL.py │ ├── CPUCB.py │ ├── CUSUM_UCB.py │ ├── DMED.py │ ├── DiscountedBayesianIndexPolicy.py │ ├── DiscountedThompson.py │ ├── DiscountedUCB.py │ ├── DoublingTrickWrapper.py │ ├── EmpiricalMeans.py │ ├── EpsilonGreedy.py │ ├── Exp3.py │ ├── Exp3PlusPlus.py │ ├── Exp3R.py │ ├── Exp3S.py │ ├── Experimentals │ │ ├── .gitignore │ │ ├── BlackBoxOpt.py │ │ ├── KLempUCB.py │ │ ├── Makefile │ │ ├── README.md │ │ ├── ThompsonRobust.py │ │ ├── UCBcython.pyx │ │ ├── UCBjulia.jl │ │ ├── UCBjulia.py │ │ ├── UCBlog10.py │ │ ├── UCBlog10alpha.py │ │ ├── UCBoost_cython.pyx │ │ ├── UCBoost_faster.py │ │ ├── UCBoost_faster_cython.pyx │ │ ├── UCBwrong.py │ │ ├── UnsupervisedLearning.py │ │ ├── __init__.py │ │ ├── klUCBlog10.py │ │ ├── klUCBloglog10.py │ │ └── setup.py │ ├── ExploreThenCommit.py │ ├── FEWA.py │ ├── GLR_UCB.py │ ├── GenericAggregation.py │ ├── GreedyOracle.py │ ├── H_UCB.py │ ├── Hedge.py │ ├── IMED.py │ ├── IndexPolicy.py │ ├── LM_DSEE.py │ ├── LearnExp.py │ ├── MEGA.py │ ├── MOSS.py │ ├── MOSSAnytime.py │ ├── MOSSExperimental.py │ ├── MOSSH.py │ ├── Makefile │ ├── Monitored_UCB.py │ ├── MusicalChair.py │ ├── MusicalChairNoSensing.py │ ├── OCUCB.py │ ├── OCUCBH.py │ ├── OSSB.py │ ├── OracleSequentiallyRestartPolicy.py │ ├── PHE.py │ ├── PRH_UCB.py │ ├── Posterior │ │ ├── Beta.py │ │ ├── DiscountedBeta.py │ │ ├── Gamma.py │ │ ├── Gauss.py │ │ ├── Posterior.py │ │ ├── README.md │ │ ├── __init__.py │ │ └── with_proba.py │ ├── ProbabilityPursuit.py │ ├── RAWUCB.py │ ├── RCB.py │ ├── README.md │ ├── RH_UCB.py │ ├── RH_UCB_Temp.py │ ├── RandomizedIndexPolicy.py │ ├── SIC_MMAB.py │ ├── SWA.py │ ├── SWHash_UCB.py │ ├── Sampled_R_UCB.py │ ├── SlidingWindowRestart.py │ ├── SlidingWindowUCB.py │ ├── Softmax.py │ ├── SparseUCB.py │ ├── SparseWrapper.py │ ├── SparseklUCB.py │ ├── StrategicBasePolicy.py │ ├── StrategicIndexPolicy.py │ ├── StrategicUCB2PhaseRobustDeprecated.py │ ├── SuccessiveElimination.py │ ├── TakeFixedArm.py │ ├── TakeRandomFixedArm.py │ ├── Thompson.py │ ├── TrekkingTSN.py │ ├── TsallisInf.py │ ├── UCB.py │ ├── UCBH.py │ ├── UCBV.py │ ├── UCBVtuned.py │ ├── UCBalpha.py │ ├── UCBdagger.py │ ├── UCBimproved.py │ ├── UCBmin.py │ ├── UCBoost.py │ ├── UCBplus.py │ ├── UCBrandomInit.py │ ├── Uniform.py │ ├── UniformOnSome.py │ ├── WrapRange.py │ ├── __init__.py │ ├── _test_for_BESA_core_function.py │ ├── klUCB.py │ ├── klUCBH.py │ ├── klUCBHPlus.py │ ├── klUCBPlus.py │ ├── klUCBPlusPlus.py │ ├── klUCB_forGLR.py │ ├── klUCBloglog.py │ ├── klUCBloglog_forGLR.py │ ├── klUCBswitch.py │ ├── kullback.py │ ├── kullback.pydoctest.txt │ ├── kullback_cython.pyx │ ├── setup.py │ ├── usenumba.py │ └── with_proba.py ├── PoliciesMultiPlayers │ ├── ALOHA.py │ ├── BaseCentralizedPolicy.py │ ├── BaseMPPolicy.py │ ├── CentralizedCycling.py │ ├── CentralizedFixed.py │ ├── CentralizedIMP.py │ ├── CentralizedMultiplePlay.py │ ├── ChildPointer.py │ ├── DepRound.py │ ├── EstimateM.py │ ├── OracleFair.py │ ├── OracleNotFair.py │ ├── README.md │ ├── RandTopM.py │ ├── RandTopMEst.py │ ├── Scenario1.py │ ├── Selfish.py │ ├── __init__.py │ ├── rhoCentralized.py │ ├── rhoEst.py │ ├── rhoLearn.py │ ├── rhoLearnEst.py │ ├── rhoLearnExp3.py │ ├── rhoRand.py │ ├── rhoRandALOHA.py │ ├── rhoRandRand.py │ ├── rhoRandRotating.py │ ├── rhoRandSticky.py │ └── with_proba.py ├── README.rst ├── __init__.py ├── complete_tree_exploration_for_MP_bandits.py ├── configuration.py ├── configuration_all_singleplayer.py ├── configuration_comparing_aggregation_algorithms.py ├── configuration_comparing_doubling_algorithms.py ├── configuration_markovian.py ├── configuration_multiplayers.py ├── configuration_multiplayers_nonstationary.py ├── configuration_multiplayers_with_aggregation.py ├── configuration_nonstationary.py ├── configuration_sparse.py ├── configuration_sparse_multiplayers.py ├── env_client.cpp ├── env_client.py ├── example_of_configuration_multiplayers.py ├── example_of_configuration_singleplayer.py ├── example_of_main_multiplayers_more.py ├── example_of_main_singleplayer.py ├── include │ ├── README.md │ ├── docopt.cpp │ ├── docopt.h │ ├── docopt_private.h │ ├── docopt_util.h │ ├── docopt_value.h │ └── subprocess.hpp ├── main.py ├── main_multiplayers.py ├── main_multiplayers_more.py ├── main_sparse_multiplayers.py ├── policy_server.py ├── save_configuration_for_reproducibility.py └── very_simple_configuration.py ├── docker └── Dockerfile └── strategic_scripts ├── draw_fig.py ├── help_experiment.py ├── main.py ├── run_experiment.py ├── run_h_ucb.sh ├── run_rh_ucb.sh ├── run_sampled_r_ucb.sh ├── run_ucb.sh └── setups ├── N100_05X100.json ├── N100_05X200.json ├── N100_05X300.json ├── N100_05X400.json ├── N100_05X500.json ├── N100_09X100.json ├── N100_09X200.json ├── N100_09X300.json ├── N100_09X400.json ├── N100_09X500.json ├── N100_default.json ├── N100_rh_ucb_best_10_100_replicate1000X3.json ├── N100_single_origin_arm1000X1.json └── N100_single_origin_arm1000X4.json /.gitignore: -------------------------------------------------------------------------------- 1 | # Python cache 2 | __pycache__/ 3 | 4 | # Visual Studio Code configuration 5 | .vscode/ 6 | -------------------------------------------------------------------------------- /SMPyBandits/Arms/Arm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Base class for an arm class.""" 3 | from __future__ import division, print_function # Python 2 compatibility 4 | 5 | __author__ = "Lilian Besson" 6 | __version__ = "0.6" 7 | 8 | 9 | class Arm(object): 10 | """ Base class for an arm class.""" 11 | 12 | def __init__(self, lower=0., amplitude=1.): 13 | """ Base class for an arm class.""" 14 | self.lower = lower #: Lower value of rewards 15 | self.amplitude = amplitude #: Amplitude of value of rewards 16 | self.min = lower #: Lower value of rewards 17 | self.max = lower + amplitude #: Higher value of rewards 18 | 19 | # --- Printing 20 | 21 | # This decorator @property makes this method an attribute, cf. https://docs.python.org/3/library/functions.html#property 22 | @property 23 | def lower_amplitude(self): 24 | """(lower, amplitude)""" 25 | if hasattr(self, 'lower') and hasattr(self, 'amplitude'): 26 | return self.lower, self.amplitude 27 | elif hasattr(self, 'min') and hasattr(self, 'max'): 28 | return self.min, self.max - self.min 29 | else: 30 | raise NotImplementedError("This method lower_amplitude() has to be implemented in the class inheriting from Arm.") 31 | 32 | # --- Printing 33 | 34 | def __str__(self): 35 | return self.__class__.__name__ 36 | 37 | def __repr__(self): 38 | return "{}({})".format(self.__class__.__name__, self.__dir__) 39 | 40 | # --- Random samples 41 | 42 | def draw(self, t=None): 43 | """ Draw one random sample.""" 44 | raise NotImplementedError("This method draw(t) has to be implemented in the class inheriting from Arm.") 45 | 46 | def oracle_draw(self, t = None): 47 | # draw the arm as usual but return the mean 48 | assert hasattr(self , "mean"), "oracle_draw can be used on Arm with self.mean" 49 | mean = self.mean 50 | self.draw(t) 51 | return mean 52 | 53 | def set_mean_param(self,mean): 54 | raise NotImplementedError("This method draw(t) has to be implemented in the class inheriting from Arm.") 55 | 56 | 57 | 58 | def draw_nparray(self, shape=(1,)): 59 | """ Draw a numpy array of random samples, of a certain shape.""" 60 | raise NotImplementedError("This method draw_nparray(t) has to be implemented in the class inheriting from Arm.") 61 | 62 | # --- Lower bound 63 | 64 | @staticmethod 65 | def kl(x, y): 66 | """ The kl(x, y) to use for this arm.""" 67 | raise NotImplementedError("This method kl(x, y) has to be implemented in the class inheriting from Arm.") 68 | 69 | @staticmethod 70 | def oneLR(mumax, mu): 71 | """ One term of the Lai & Robbins lower bound for Gaussian arms: (mumax - mu) / KL(mu, mumax). """ 72 | raise NotImplementedError("This method oneLR(mumax, mu) has to be implemented in the class inheriting from Arm.") 73 | 74 | @staticmethod 75 | def oneHOI(mumax, mu): 76 | """ One term for the HOI factor for this arm.""" 77 | return 1 - (mumax - mu) 78 | -------------------------------------------------------------------------------- /SMPyBandits/Arms/Bernoulli.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Bernoulli distributed arm. 3 | 4 | Example of creating an arm: 5 | 6 | >>> import random; import numpy as np 7 | >>> random.seed(0); np.random.seed(0) 8 | >>> B03 = Bernoulli(0.3) 9 | >>> B03 10 | B(0.3) 11 | >>> B03.mean 12 | 0.3 13 | 14 | Examples of sampling from an arm: 15 | 16 | >>> B03.draw() 17 | 0 18 | >>> B03.draw_nparray(20) 19 | array([1., 0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 1., 0., 0., 0., 1., 20 | 1., 1., 1.]) 21 | """ 22 | from __future__ import division, print_function # Python 2 compatibility 23 | 24 | __author__ = "Lilian Besson" 25 | __version__ = "0.6" 26 | 27 | import numpy as np 28 | from numpy.random import binomial 29 | 30 | # Local imports 31 | try: 32 | from .Arm import Arm 33 | from .kullback import klBern 34 | except ImportError: 35 | from Arm import Arm 36 | from kullback import klBern 37 | 38 | 39 | class Bernoulli(Arm): 40 | """ Bernoulli distributed arm.""" 41 | 42 | def __init__(self, probability): 43 | """New arm.""" 44 | assert 0 <= probability <= 1, "Error, the parameter probability for Bernoulli class has to be in [0, 1]." # DEBUG 45 | self.probability = probability #: Parameter p for this Bernoulli arm 46 | self.mean = probability #: Mean for this Bernoulli arm 47 | 48 | # --- Random samples 49 | 50 | def draw(self, t=None): 51 | """ Draw one random sample.""" 52 | return binomial(1, self.probability) 53 | # return np.asarray(binomial(1, self.probability), dtype=float) 54 | 55 | def draw_nparray(self, shape=(1,)): 56 | """ Draw a numpy array of random samples, of a certain shape.""" 57 | return np.asarray(binomial(1, self.probability, shape), dtype=float) 58 | 59 | def set_mean_param(self, probability): 60 | self.probability = self.mean = probability 61 | 62 | # --- Printing 63 | 64 | # This decorator @property makes this method an attribute, cf. https://docs.python.org/3/library/functions.html#property 65 | @property 66 | def lower_amplitude(self): 67 | """(lower, amplitude)""" 68 | return 0., 1. 69 | 70 | def __str__(self): 71 | return "Bernoulli" 72 | 73 | def __repr__(self): 74 | return "B({:.3g})".format(self.probability) 75 | 76 | # --- Lower bound 77 | 78 | @staticmethod 79 | def kl(x, y): 80 | """ The kl(x, y) to use for this arm.""" 81 | return klBern(x, y) 82 | 83 | @staticmethod 84 | def oneLR(mumax, mu): 85 | """ One term of the Lai & Robbins lower bound for Bernoulli arms: (mumax - mu) / KL(mu, mumax). """ 86 | return (mumax - mu) / klBern(mu, mumax) 87 | 88 | 89 | # Only export and expose the class defined here 90 | __all__ = ["Bernoulli"] 91 | 92 | 93 | # --- Debugging 94 | 95 | if __name__ == "__main__": 96 | # Code for debugging purposes. 97 | from doctest import testmod 98 | print("\nTesting automatically all the docstring written in each functions of this module :") 99 | testmod(verbose=True) 100 | -------------------------------------------------------------------------------- /SMPyBandits/Arms/Constant.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Arm with a constant reward. Useful for debugging. 3 | 4 | Example of creating an arm: 5 | 6 | >>> C013 = Constant(0.13) 7 | >>> C013 8 | Constant(0.13) 9 | >>> C013.mean 10 | 0.13 11 | 12 | Examples of sampling from an arm: 13 | 14 | >>> C013.draw() 15 | 0.13 16 | >>> C013.draw_nparray(3) 17 | array([0.13, 0.13, 0.13]) 18 | """ 19 | from __future__ import division, print_function # Python 2 compatibility 20 | 21 | __author__ = "Lilian Besson" 22 | __version__ = "0.6" 23 | 24 | import numpy as np 25 | 26 | # Local imports 27 | try: 28 | from .Arm import Arm 29 | except ImportError: 30 | from Arm import Arm 31 | 32 | 33 | class Constant(Arm): 34 | """ Arm with a constant reward. Useful for debugging. 35 | 36 | - `constant_reward` is the constant reward, 37 | - `lower`, `amplitude` default to `floor(constant_reward)`, `1` (so the ) 38 | 39 | >>> arm_0_5 = Constant(0.5) 40 | >>> arm_0_5.draw() 41 | 0.5 42 | >>> arm_0_5.draw_nparray((3, 2)) 43 | array([[0.5, 0.5], 44 | [0.5, 0.5], 45 | [0.5, 0.5]]) 46 | """ 47 | 48 | def __init__(self, constant_reward=0.5, lower=0., amplitude=1.): 49 | """ New arm.""" 50 | constant_reward = float(constant_reward) 51 | self.constant_reward = constant_reward #: Constant value of rewards 52 | lower = min(lower, np.floor(constant_reward)) 53 | self.lower = lower #: Known lower value of rewards 54 | self.amplitude = amplitude #: Known amplitude of rewards 55 | self.mean = constant_reward #: Mean for this Constant arm 56 | 57 | # --- Random samples 58 | 59 | def draw(self, t=None): 60 | """ Draw one constant sample. The parameter t is ignored in this Arm.""" 61 | return self.constant_reward 62 | 63 | def draw_nparray(self, shape=(1,)): 64 | """ Draw a numpy array of constant samples, of a certain shape.""" 65 | return np.full(shape, self.constant_reward) 66 | 67 | def set_mean_param(self, mean): 68 | self.mean = mean 69 | 70 | # --- Printing 71 | 72 | def __str__(self): 73 | return "Constant" 74 | 75 | def __repr__(self): 76 | return "Constant({:.3g})".format(self.constant_reward) 77 | 78 | # --- Lower bound 79 | 80 | @staticmethod 81 | def kl(x, y): 82 | """ The `kl(x, y) = abs(x - y)` to use for this arm.""" 83 | return abs(x - y) 84 | 85 | @staticmethod 86 | def oneLR(mumax, mu): 87 | """ One term of the Lai & Robbins lower bound for Constant arms: (mumax - mu) / KL(mu, mumax). """ 88 | return (mumax - mu) / abs(mumax - mu) 89 | 90 | 91 | __all__ = ["Constant"] 92 | 93 | 94 | # --- Debugging 95 | 96 | if __name__ == "__main__": 97 | # Code for debugging purposes. 98 | from doctest import testmod 99 | print("\nTesting automatically all the docstring written in each functions of this module :") 100 | testmod(verbose=True) 101 | -------------------------------------------------------------------------------- /SMPyBandits/Arms/README.md: -------------------------------------------------------------------------------- 1 | # [Arms](https://smpybandits.github.io/docs/Arms.html) 2 | > See here the documentation: [docs/Arms](https://smpybandits.github.io/docs/Arms.html) 3 | 4 | Arms : contains different types of bandit arms: 5 | [`Constant`](Constant.py), [`UniformArm`](UniformArm.py), [`Bernoulli`](Bernoulli.py), [`Binomial`](Binomial.py), [`Poisson`](Poisson.py), [`Gaussian`](Gaussian.py), [`Exponential`](Exponential.py), [`Gamma`](Gamma.py). 6 | 7 | Each arm class follows the same interface: 8 | 9 | ```python 10 | >>> my_arm = Arm(params) 11 | >>> my_arm.mean 12 | 0.5 13 | >>> my_arm.draw() # one random draw 14 | 0.0 15 | >>> my_arm.draw_nparray(20) # or ((3, 10)), many draw 16 | array([ 0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 17 | 1., 0., 0., 0., 1., 1., 1.]) 18 | ``` 19 | 20 | 21 | Also the [`__init__.py`](__init__.py) file contains: 22 | 23 | - `uniformMeans`, to generate uniformly spaced means of arms. 24 | - `uniformMeansWithSparsity`, to generate uniformly spaced means of arms, with sparsity constraints. 25 | - `randomMeans`, to generate randomly spaced means of arms. 26 | - `randomMeansWithGapBetweenMbestMworst`, to generate randomly spaced means of arms, with a constraint on the gap between the M-best arms and the (K-M)-worst arms. 27 | - `randomMeans`, to generate randomly spaced means of arms. 28 | - `shuffled`, to return a shuffled version of a list. 29 | - Utility functions `array_from_str` `list_from_str` and `tuple_from_str` to obtain a `numpy.ndarray`, a `list` or a `tuple` from a string (used for the CLI env variables interface). 30 | - `optimal_selection_probabilities`. -------------------------------------------------------------------------------- /SMPyBandits/Arms/RestedRottingArm.py: -------------------------------------------------------------------------------- 1 | """ 2 | author: Julien Seznec 3 | Rested rotting arm, i.e. arms with mean value which decay at each pull 4 | """ 5 | 6 | try: 7 | from . import Arm, Bernoulli, Binomial, UnboundedExponential, UnboundedGaussian, Constant, UnboundedPoisson 8 | except ImportError: 9 | from Arm import Arm 10 | from Bernoulli import Bernoulli 11 | from Binomial import Binomial 12 | from Exponential import UnboundedExponential 13 | from Gaussian import UnboundedGaussian 14 | from Constant import Constant 15 | from Poisson import UnboundedPoisson 16 | 17 | class RestedRottingArm(Arm): 18 | def __init__(self, decayingFunction, staticArm): 19 | self.decayingFunction = decayingFunction 20 | # It provides the mean of the arm after n pulls. EXCEPT for truncated distributions where it is the mean of the untrucated distributions 21 | self.arm = staticArm 22 | self.pull_count = 0 23 | self.arm.set_mean_param(self.decayingFunction(self.pull_count)) 24 | self.mean = self.arm.mean 25 | 26 | def draw(self, t=None): 27 | self.arm.set_mean_param(self.decayingFunction(self.pull_count)) 28 | current_mean = self.mean 29 | self.mean = self.arm.mean 30 | draw = self.arm.draw(t) 31 | self.pull_count += 1 32 | self.arm.set_mean_param(self.decayingFunction(self.pull_count)) 33 | self.mean = self.arm.mean 34 | assert current_mean >= self.mean, "Arm has increased." 35 | return draw 36 | 37 | 38 | class RestedRottingBernoulli(RestedRottingArm): 39 | def __init__(self, decayingFunction): 40 | arm = Bernoulli(0) 41 | super(RestedRottingBernoulli, self).__init__(decayingFunction, arm) 42 | 43 | 44 | class RestedRottingBinomial(RestedRottingArm): 45 | def __init__(self, decayingFunction, draws=1): 46 | arm = Binomial(0, draws) 47 | super(RestedRottingBinomial, self).__init__(decayingFunction, arm) 48 | 49 | 50 | class RestedRottingConstant(RestedRottingArm): 51 | def __init__(self, decayingFunction): 52 | arm = Constant(0) 53 | super(RestedRottingConstant, self).__init__(decayingFunction, arm) 54 | 55 | 56 | class RestedRottingExponential(RestedRottingArm): 57 | def __init__(self, decayingFunction): 58 | arm = UnboundedExponential(1) 59 | super(RestedRottingExponential, self).__init__(decayingFunction, arm) 60 | 61 | 62 | class RestedRottingGaussian(RestedRottingArm): 63 | def __init__(self, decayingFunction, sigma=1): 64 | arm = UnboundedGaussian(0, sigma) 65 | super(RestedRottingGaussian, self).__init__(decayingFunction, arm) 66 | 67 | 68 | class RestedRottingPoisson(RestedRottingArm): 69 | def __init__(self, decayingFunction, sigma=1): 70 | arm = UnboundedPoisson(0) 71 | super(RestedRottingPoisson, self).__init__(decayingFunction, arm) 72 | 73 | 74 | 75 | if __name__ == '__main__': 76 | rotting_bernoulli = RestedRottingBernoulli(lambda n: 0 if n > 10 else 1) 77 | rotting_gaussian = RestedRottingGaussian(lambda n: 0 if n > 10 else 1) 78 | print([rotting_gaussian.draw() for _ in range(50)]) 79 | print([rotting_bernoulli.draw() for _ in range(50)]) -------------------------------------------------------------------------------- /SMPyBandits/Arms/RestlessArm.py: -------------------------------------------------------------------------------- 1 | """ 2 | author: Julien Seznec 3 | Restless arm, i.e. arms with mean value which change at each round 4 | """ 5 | try: 6 | from . import Arm, Bernoulli, Binomial, UnboundedExponential, UnboundedGaussian, Constant, UnboundedPoisson 7 | except ImportError: 8 | from Arm import Arm 9 | from Bernoulli import Bernoulli 10 | from Binomial import Binomial 11 | from Exponential import UnboundedExponential 12 | from Gaussian import UnboundedGaussian 13 | from Constant import Constant 14 | from Poisson import UnboundedPoisson 15 | 16 | from math import sin 17 | 18 | 19 | class RestlessArm(Arm): 20 | def __init__(self, rewardFunction, staticArm): 21 | self.reward = rewardFunction 22 | # It provides the mean of the arm after n pulls. EXCEPT for truncated distributions where it is the mean of the untrucated distributions 23 | self.arm = staticArm 24 | self.mean = self.arm.mean 25 | 26 | def draw(self, t): 27 | self.arm.set_mean_param(self.reward(t)) 28 | self.mean = self.arm.mean 29 | draw = self.arm.draw(t) 30 | return draw 31 | 32 | 33 | class RestlessBernoulli(RestlessArm): 34 | def __init__(self, rewardFunction): 35 | arm = Bernoulli(0) 36 | super(RestlessBernoulli, self).__init__(rewardFunction, arm) 37 | 38 | 39 | class RestlessBinomial(RestlessArm): 40 | def __init__(self, rewardFunction, draws=1): 41 | arm = Binomial(0, draws) 42 | super(RestlessBinomial, self).__init__(rewardFunction, arm) 43 | 44 | 45 | class RestlessConstant(RestlessArm): 46 | def __init__(self, rewardFunction): 47 | arm = Constant(0) 48 | super(RestlessConstant, self).__init__(rewardFunction, arm) 49 | 50 | 51 | class RestlessExponential(RestlessArm): 52 | def __init__(self, rewardFunction): 53 | arm = UnboundedExponential(1) 54 | super(RestlessExponential, self).__init__(rewardFunction, arm) 55 | 56 | 57 | class RestlessGaussian(RestlessArm): 58 | def __init__(self, rewardFunction, sigma=1): 59 | arm = UnboundedGaussian(0, sigma) 60 | super(RestlessGaussian, self).__init__(rewardFunction, arm) 61 | 62 | 63 | class RestlessPoisson(RestlessArm): 64 | def __init__(self, rewardFunction, sigma=1): 65 | arm = UnboundedPoisson(0) 66 | super(RestlessPoisson, self).__init__(rewardFunction, arm) 67 | 68 | 69 | 70 | if __name__ == '__main__': 71 | restless_bernoulli = RestlessBernoulli(lambda x :sin(x)**2) 72 | restless_gaussian = RestlessGaussian(lambda x :sin(x)**2) 73 | restless_binomial = RestlessBinomial(lambda x :sin(x)**2, draws=10) 74 | print([sin(t)**2 for t in range(50)]) 75 | print([restless_gaussian.draw(t) for t in range(50)]) 76 | print([restless_bernoulli.draw(t) for t in range(50)]) 77 | print([restless_binomial.draw(t) for t in range(50)]) 78 | -------------------------------------------------------------------------------- /SMPyBandits/Arms/kullback.py: -------------------------------------------------------------------------------- 1 | ../Policies/kullback.py -------------------------------------------------------------------------------- /SMPyBandits/Arms/usenumba.py: -------------------------------------------------------------------------------- 1 | ../Policies/usenumba.py -------------------------------------------------------------------------------- /SMPyBandits/C_Interface/.gitignore: -------------------------------------------------------------------------------- 1 | test 2 | -------------------------------------------------------------------------------- /SMPyBandits/C_Interface/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | SHELL=/usr/bin/env /bin/bash 3 | 4 | test2: 5 | gcc -o test `python2.7-config --cflags` test.c `python2.7-config --ldflags` 6 | 7 | test3: 8 | gcc -o test `python3.5-config --cflags` test.c `python3.5-config --ldflags` 9 | -------------------------------------------------------------------------------- /SMPyBandits/C_Interface/README.md: -------------------------------------------------------------------------------- 1 | # C_Interface 2 | 3 | This folder contains some experiments to create a C++ binding from my Python framework. 4 | 5 | TL;DR: so far, it failed. I stopped trying. 6 | -------------------------------------------------------------------------------- /SMPyBandits/C_Interface/Sample.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | """ Test module to be called from C++""" 3 | 4 | # from __future__ import print_function 5 | 6 | def add(a, b): 7 | """ Returns the sum of two numbers.""" 8 | a, b = int(a), int(b) 9 | c = str(a + b) 10 | print("a = {} and b = {} and a + b = {}".format(a, b, c)) 11 | return c 12 | -------------------------------------------------------------------------------- /SMPyBandits/C_Interface/test.cpp: -------------------------------------------------------------------------------- 1 | #include "iostream" 2 | #include "Python.h" 3 | 4 | 5 | int main(int argc, char* argv[]) { 6 | printf("Calling Python to find the sum of 2 and 2.\n"); 7 | 8 | // Initialize the Python interpreter. 9 | Py_Initialize(); 10 | 11 | // Create some Python objects that will later be assigned values. 12 | PyObject *pName, *pModule, *pDict, *pFunc, *pArgs, *pValue; 13 | 14 | // Convert the file name to a Python string. 15 | pName = PyString_FromString("Sample"); 16 | 17 | // Import the file as a Python module. 18 | pModule = PyImport_Import(pName); 19 | 20 | // Create a dictionary for the contents of the module. 21 | pDict = PyModule_GetDict(pModule); 22 | 23 | // Get the add method from the dictionary. 24 | pFunc = PyDict_GetItemString(pDict, "add"); 25 | 26 | // Create a Python tuple to hold the arguments to the method. 27 | pArgs = PyTuple_New(2); 28 | 29 | // Convert 2 to a Python integer. 30 | pValue = PyInt_FromLong(2); 31 | 32 | // Set the Python int as the first and second arguments to the method. 33 | PyTuple_SetItem(pArgs, 0, pValue); 34 | PyTuple_SetItem(pArgs, 1, pValue); 35 | 36 | // Call the function with the arguments. 37 | PyObject* pResult = PyObject_CallObject(pFunc, pArgs); 38 | 39 | // Print a message if calling the method failed. 40 | if (pResult == NULL) { 41 | printf("Calling the add method failed.\n"); 42 | } 43 | 44 | // Convert the result to a long from a Python object. 45 | long result = PyInt_AsLong(pResult); 46 | 47 | // Destroy the Python interpreter. 48 | Py_Finalize(); 49 | 50 | // Print the result. 51 | printf("The result is %d.\n", result); 52 | std::cin.ignore(); 53 | 54 | return 0; 55 | } 56 | -------------------------------------------------------------------------------- /SMPyBandits/C_Interface/test.py: -------------------------------------------------------------------------------- 1 | from Policies import * 2 | 3 | policy = UCB(10) 4 | print(policy) 5 | 6 | def choice(): 7 | result = policy.choice() 8 | return result 9 | 10 | def getReward(arm, reward): 11 | result = policy.getReward(arm, reward) 12 | return result 13 | -------------------------------------------------------------------------------- /SMPyBandits/C_Interface/test2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | using namespace boost::python; 6 | 7 | int main(int, char **) { 8 | Py_Initialize(); 9 | 10 | try { 11 | object module = import("__main__"); 12 | object name_space = module.attr("__dict__"); 13 | exec_file("test.py", name_space, name_space); 14 | 15 | object choice = name_space["choice"]; 16 | object result = choice(); 17 | // result is a dictionary 18 | std::string val = extract(result["val"]); 19 | std::cout << val << std::endl; 20 | } 21 | catch (error_already_set) { 22 | PyErr_Print(); 23 | } 24 | 25 | Py_Finalize(); 26 | return 0; 27 | } -------------------------------------------------------------------------------- /SMPyBandits/C_Interface/test_sub.cpp: -------------------------------------------------------------------------------- 1 | // #! g++ -std=c++11 -Iinclude -o test_sub.exe test_sub.cpp -pthread 2 | /** 3 | Test of https://github.com/arun11299/cpp-subprocess 4 | 5 | - Author: Lilian Besson 6 | - License: MIT License (https://lbesson.mit-license.org/) 7 | - Date: 09-08-2017 8 | - Online: https://smpybandits.github.io/ 9 | - Reference: https://github.com/arun11299/cpp-subprocess 10 | */ 11 | 12 | // Include libraries 13 | #include // streams, <<, >> 14 | #include // strlen 15 | #include 16 | #include "subprocess.hpp" // From https://github.com/arun11299/cpp-subprocess 17 | 18 | // Macros to send a message 19 | #define send(msg) p.send(msg, strlen(msg)) 20 | #define communicate(msg) p.communicate(msg, strlen(msg)) 21 | 22 | 23 | int main() { 24 | namespace sp = subprocess; 25 | 26 | // auto p = sp::Popen({"python3"}, sp::input{sp::PIPE}); 27 | auto p = sp::Popen({"python3"}, sp::input{sp::PIPE}, sp::output{sp::PIPE}); 28 | auto input = p.input(); 29 | auto output = p.output(); 30 | 31 | // Import all the policies 32 | send("from Policies import *\n"); 33 | // std::cout << output.buf.data() << std::endl; 34 | 35 | // Create the policy 36 | send("policy = UCBalpha(10, alpha=0.5)\n"); 37 | // std::cout << output.buf.data() << std::endl; 38 | 39 | // Print it 40 | send("print(policy)\n"); 41 | // std::cout << output.buf.data() << std::endl; 42 | 43 | // Print it 44 | send("print(policy)\n"); 45 | // std::cout << output.buf.data() << std::endl; 46 | 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /SMPyBandits/Environment/MAB_rotting.py: -------------------------------------------------------------------------------- 1 | """ 2 | author : Julien SEZNEC 3 | Code to launch (rotting) bandit games. 4 | It is code in a functional programming way : each execution return arrays related to each run. 5 | """ 6 | 7 | import time 8 | import numpy as np 9 | import logging 10 | from joblib import Parallel, delayed 11 | 12 | REPETITIONS = 1000 13 | HORIZON = 10000 14 | 15 | def repetedRuns(policy, arms, rep = REPETITIONS, T = HORIZON, parallel = True, oracle = False): 16 | rew = np.empty(shape = (rep, T)) 17 | noisy_rew = np.empty(shape = (rep, T)) 18 | time = np.empty(shape = (rep, T)) 19 | pulls = np.empty(shape=(rep, T)) 20 | cumul_pulls = np.empty(shape=(rep, len(arms))) 21 | if parallel: 22 | res = Parallel(n_jobs=parallel)(delayed(singleRun)(policy,arms, T, r, oracle) for r in range(rep)) 23 | else: 24 | res = [singleRun(policy,arms, T=T) for _ in range(rep)] 25 | rew[:, :] = np.array([r['cumul'] for r in res ]) 26 | noisy_rew[:, :] = np.array([r['noisy_cumul'] for r in res]) 27 | time[:, :] = np.array([r['time'] for r in res ]) 28 | pulls[:,:] = np.array([r['pulls'] for r in res ]) 29 | cumul_pulls[:,:] = np.array([r['cumul_pulls'] for r in res ]) 30 | return rew, noisy_rew, time, pulls, cumul_pulls 31 | 32 | def singleRun(policy, arms, T = HORIZON,rep_index = 0, oracle=False): 33 | myArms = [arm[0](**arm[1]) for arm in arms] 34 | if oracle: 35 | policy[1]['arms'] = myArms 36 | myPolicy = policy[0](len(myArms), **policy[1]) 37 | myPolicy.startGame() 38 | logging.debug(str(rep_index) + ' ' + myPolicy.__str__()) 39 | res = play(myArms, myPolicy, T, Oracle=oracle) 40 | return { 41 | 'cumul': np.array(res['rewards']).cumsum(), 42 | 'noisy_cumul': np.array(res['noisy_rewards']), 43 | 'time' : np.array(res['time']), 44 | 'pulls' : np.array(res['pulls']), 45 | 'cumul_pulls' : np.array(res['cumul_pulls']) 46 | } 47 | 48 | 49 | def play(arms, policy, T, Oracle= False): 50 | noisy_rewards = [] 51 | rewards = [] 52 | times = [] 53 | pulls = [] 54 | cumul_pulls = [0 for _ in range(len(arms))] 55 | for t in range(T): 56 | start = time.time() 57 | choice = policy.choice() 58 | reward = arms[choice].mean 59 | noisy_reward = arms[choice].draw(t) if not Oracle else arms[choice].oracle_draw(t) 60 | policy.getReward(choice, noisy_reward) 61 | times.append(time.time() - start) 62 | noisy_rewards.append(noisy_reward) 63 | rewards.append(reward) 64 | pulls.append(choice) 65 | cumul_pulls[choice] += 1 66 | return {'rewards': rewards, 'noisy_rewards': noisy_rewards, 'time': times, 'pulls': pulls, 'cumul_pulls' : cumul_pulls} -------------------------------------------------------------------------------- /SMPyBandits/Environment/README.md: -------------------------------------------------------------------------------- 1 | # [Environments](https://smpybandits.github.io/docs/Environment.html) 2 | > See here the documentation: [docs/Environment](https://smpybandits.github.io/docs/Environment.html) 3 | 4 | - [`MAB`](MAB.py), [`MarkovianMAB`](MarkovianMAB.py), [`DynamicMAB`](DynamicMAB.py) and [`IncreasingMAB`](IncreasingMAB.py) objects, used to wrap the problems (list of arms). 5 | - [`Result`](Result.py) and [`ResultMultiPlayers`](ResultMultiPlayers.py) objects, used to wrap simulation results (list of decisions and rewards). 6 | - [`Evaluator`](Evaluator.py) environment, used to wrap simulation, for the single player case. 7 | - [`EvaluatorMultiPlayers`](EvaluatorMultiPlayers.py) environment, used to wrap simulation, for the multi-players case. 8 | - [`EvaluatorSparseMultiPlayers`](EvaluatorSparseMultiPlayers.py) environment, used to wrap simulation, for the multi-players case with sparse activated players. 9 | - [`CollisionModels`](CollisionModels.py) implements different collision models. 10 | 11 | And useful constants and functions for the plotting and stuff are in the [`__init__.py`](__init__.py) file: 12 | 13 | - `DPI`, `signature`, `maximizeWindow`, `palette`, `makemarkers`, `wraptext`: for plotting 14 | - `notify`: send a desktop notification 15 | - `Parallel`, `delayed`: joblib related 16 | - `tqdm`: pretty range() loops 17 | - `sortedDistance`, `fairnessMeasures`: science related -------------------------------------------------------------------------------- /SMPyBandits/Environment/Result.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Result.Result class to wrap the simulation results.""" 3 | from __future__ import division, print_function # Python 2 compatibility 4 | 5 | __author__ = "Lilian Besson" 6 | __version__ = "0.9" 7 | 8 | import numpy as np 9 | 10 | 11 | class Result(object): 12 | """ Result accumulators.""" 13 | 14 | # , delta_t_save=1): 15 | def __init__(self, nbArms, horizon, indexes_bestarm=-1, means=None): 16 | """ Create ResultMultiPlayers.""" 17 | # self._means = means # Keep the means for ChangingAtEachRepMAB cases 18 | # self.delta_t_save = delta_t_save #: Sample rate for saving. 19 | self.choices = np.zeros(horizon, dtype=int) #: Store all the choices. 20 | self.rewards = np.zeros(horizon) #: Store all the rewards, to compute the mean. 21 | self.pulls = np.zeros(nbArms, dtype=int) #: Store the pulls. 22 | if means is not None: 23 | indexes_bestarm = np.nonzero(np.isclose(means, np.max(means)))[0] 24 | indexes_bestarm = np.asarray(indexes_bestarm) 25 | if np.size(indexes_bestarm) == 1: 26 | indexes_bestarm = np.asarray([indexes_bestarm]) 27 | self.indexes_bestarm = [ indexes_bestarm for _ in range(horizon)] #: Store also the position of the best arm, XXX in case of dynamically switching environment. 28 | self.running_time = -1 #: Store the running time of the experiment. 29 | self.memory_consumption = -1 #: Store the memory consumption of the experiment. 30 | self.number_of_cp_detections = 0 #: Store the number of change point detected during the experiment. 31 | 32 | def store(self, time, choice, reward): 33 | """ Store results.""" 34 | self.choices[time] = choice 35 | self.rewards[time] = reward 36 | self.pulls[choice] += 1 37 | 38 | def change_in_arms(self, time, indexes_bestarm): 39 | """ Store the position of the best arm from this list of arm. 40 | 41 | - From that time t **and after**, the index of the best arm is stored as ``indexes_bestarm``. 42 | 43 | .. warning:: FIXME This is still experimental! 44 | """ 45 | for t in range(time, len(self.indexes_bestarm)): 46 | self.indexes_bestarm[t] = indexes_bestarm 47 | -------------------------------------------------------------------------------- /SMPyBandits/Environment/ResultMultiPlayers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ ResultMultiPlayers.ResultMultiPlayers class to wrap the simulation results, for the multi-players case.""" 3 | from __future__ import division, print_function # Python 2 compatibility 4 | 5 | __author__ = "Lilian Besson" 6 | __version__ = "0.9" 7 | 8 | import numpy as np 9 | 10 | 11 | class ResultMultiPlayers(object): 12 | """ ResultMultiPlayers accumulators, for the multi-players case. """ 13 | 14 | # , delta_t_save=1 15 | def __init__(self, nbArms, horizon, nbPlayers, means=None): 16 | """ Create ResultMultiPlayers.""" 17 | # self._means = means # Keep the means for ChangingAtEachRepMAB cases 18 | self.choices = np.zeros((nbPlayers, horizon), dtype=int) #: Store all the choices of all the players 19 | self.rewards = np.zeros((nbPlayers, horizon)) #: Store all the rewards of all the players, to compute the mean 20 | # self.rewardsSquared = np.zeros((nbPlayers, horizon)) #: Store all the rewards**2 of all the players, to compute the variance # XXX uncomment if needed 21 | self.pulls = np.zeros((nbPlayers, nbArms), dtype=int) #: Store the pulls of all the players 22 | self.allPulls = np.zeros((nbPlayers, nbArms, horizon), dtype=int) #: Store all the pulls of all the players 23 | self.collisions = np.zeros((nbArms, horizon), dtype=int) #: Store the collisions on all the arms 24 | self.running_time = -1 #: Store the running time of the experiment 25 | self.memory_consumption = -1 #: Store the memory consumption of the experiment 26 | 27 | def store(self, time, choices, rewards, pulls, collisions): 28 | """ Store results.""" 29 | self.choices[:, time] = choices 30 | self.rewards[:, time] = rewards 31 | # self.rewardsSquared[:, time] = rewards ** 2 # XXX uncomment if needed 32 | self.pulls += pulls 33 | self.allPulls[:, :, time] = pulls 34 | self.collisions[:, time] = collisions 35 | -------------------------------------------------------------------------------- /SMPyBandits/Environment/StrategicResult.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Result.Result class to wrap the simulation results.""" 3 | from __future__ import division, print_function # Python 2 compatibility 4 | 5 | __author__ = "SlyJabiru" 6 | __version__ = "0.1" 7 | 8 | import numpy as np 9 | 10 | 11 | class StrategicResult(object): 12 | """ Result accumulators.""" 13 | 14 | # , delta_t_save=1): 15 | def __init__(self, nbArms, horizon, nbArmsPerAgents, means, bestArmMean, 16 | indexes_bestarm=-1): 17 | """ Create ResultMultiPlayers.""" 18 | # self._means = means # Keep the means for ChangingAtEachRepMAB cases 19 | # self.delta_t_save = delta_t_save #: Sample rate for saving. 20 | self.means = means 21 | self.bestArmMean = bestArmMean 22 | 23 | self.choices = np.zeros(horizon, dtype=int) #: Store all the choices. 24 | self.rewards = np.zeros(horizon) #: Store all the rewards, to compute the mean. 25 | self.pulls = np.zeros(nbArms, dtype=int) #: Store the pulls. 26 | self.instantRegrets = np.zeros(horizon) 27 | 28 | self.nbArmsPerAgents = nbArmsPerAgents 29 | self.agentChoices = np.zeros(horizon, dtype=int) 30 | self.agentChosenNb = np.zeros(len(nbArmsPerAgents), dtype=int) 31 | 32 | self.rewardsPerArms = np.zeros(nbArms) 33 | self.rewardsPerAgents = np.zeros(len(nbArmsPerAgents)) 34 | 35 | # if means is not None: 36 | # indexes_bestarm = np.nonzero(np.isclose(means, np.max(means)))[0] 37 | # indexes_bestarm = np.asarray(indexes_bestarm) 38 | # if np.size(indexes_bestarm) == 1: 39 | # indexes_bestarm = np.asarray([indexes_bestarm]) 40 | # self.indexes_bestarm = [ indexes_bestarm for _ in range(horizon)] #: Store also the position of the best arm, XXX in case of dynamically switching environment. 41 | # self.running_time = -1 #: Store the running time of the experiment. 42 | # self.memory_consumption = -1 #: Store the memory consumption of the experiment. 43 | # self.number_of_cp_detections = 0 #: Store the number of change point detected during the experiment. 44 | 45 | def store(self, time, choice, reward): 46 | """ Store results.""" 47 | self.choices[time] = choice # 몇 번 arm 을 뽑았는가? 48 | self.rewards[time] = reward 49 | self.pulls[choice] += 1 # 각 arm 을 몇 번 뽑았는가? 50 | self.instantRegrets[time] = self.bestArmMean - self.means[choice] 51 | 52 | armPossession = np.cumsum(self.nbArmsPerAgents) - 1 53 | temp = (armPossession >= choice) 54 | agent = np.where(temp)[0][0] 55 | 56 | self.agentChoices[time] = agent 57 | self.agentChosenNb[agent] += 1 58 | 59 | # 여기서, agent 당 reward 를 만들어서 올려주자. 60 | # 그리고, strategic evaluator 에서 받아주면 됨! 61 | self.rewardsPerArms[choice] += reward 62 | self.rewardsPerAgents[agent] += reward 63 | 64 | 65 | # def change_in_arms(self, time, indexes_bestarm): 66 | # """ Store the position of the best arm from this list of arm. 67 | 68 | # - From that time t **and after**, the index of the best arm is stored as ``indexes_bestarm``. 69 | 70 | # .. warning:: FIXME This is still experimental! 71 | # """ 72 | # for t in range(time, len(self.indexes_bestarm)): 73 | # self.indexes_bestarm[t] = indexes_bestarm 74 | -------------------------------------------------------------------------------- /SMPyBandits/Environment/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ ``Environment`` module: 3 | 4 | - :class:`MAB`, :class:`MarkovianMAB`, :class:`ChangingAtEachRepMAB`, :class:`IncreasingMAB`, :class:`PieceWiseStationaryMAB`, :class:`NonStationaryMAB` objects, used to wrap the problems (essentially a list of arms). 5 | - :class:`Result` and :class:`ResultMultiPlayers` objects, used to wrap simulation results (list of decisions and rewards). 6 | - :class:`Evaluator` environment, used to wrap simulation, for the single player case. 7 | - :class:`EvaluatorMultiPlayers` environment, used to wrap simulation, for the multi-players case. 8 | - :class:`EvaluatorSparseMultiPlayers` environment, used to wrap simulation, for the multi-players case with sparse activated players. 9 | - :mod:`CollisionModels` implements different collision models. 10 | 11 | And useful constants and functions for the plotting and stuff: 12 | 13 | - :data:`DPI`, :func:`signature`, :func:`maximizeWindow`, :func:`palette`, :func:`makemarkers`, :func:`wraptext`: for plotting, 14 | - :func:`notify`: send a desktop notification, 15 | - :func:`Parallel`, :func:`delayed`: joblib related, 16 | - :mod:`tqdm`: pretty range() loops, 17 | - :mod:`sortedDistance`, :mod:`fairnessMeasures`: science related, 18 | - :func:`getCurrentMemory`, :func:`sizeof_fmt`: to measure and pretty print memory consumption. 19 | """ 20 | from __future__ import division, print_function # Python 2 compatibility 21 | 22 | __author__ = "Lilian Besson" 23 | __version__ = "0.9" 24 | 25 | from .MAB import MAB, MarkovianMAB, ChangingAtEachRepMAB, IncreasingMAB, PieceWiseStationaryMAB, NonStationaryMAB 26 | 27 | from .Result import Result 28 | from .Evaluator import Evaluator 29 | from .StrategicEvaluator import StrategicEvaluator 30 | 31 | from .CollisionModels import * 32 | from .ResultMultiPlayers import ResultMultiPlayers 33 | from .EvaluatorMultiPlayers import EvaluatorMultiPlayers 34 | from .EvaluatorSparseMultiPlayers import EvaluatorSparseMultiPlayers 35 | 36 | from .plotsettings import DPI, signature, maximizeWindow, palette, makemarkers, wraptext 37 | 38 | from .notify import notify 39 | 40 | from .usejoblib import USE_JOBLIB, Parallel, delayed 41 | from .usetqdm import USE_TQDM, tqdm 42 | 43 | from .sortedDistance import weightedDistance, manhattan, kendalltau, spearmanr, gestalt, meanDistance, sortedDistance 44 | from .fairnessMeasures import amplitude_fairness, std_fairness, rajjain_fairness, mo_walrand_fairness, mean_fairness, fairnessMeasure, fairness_mapping 45 | 46 | from .memory_consumption import getCurrentMemory, sizeof_fmt, start_tracemalloc, display_top_tracemalloc 47 | -------------------------------------------------------------------------------- /SMPyBandits/Environment/plot_Cmu_HOI.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Plot the C(mu) Lai & Robbins term and the HOI(mu) OI factor for various Bernoulli MAB problem.""" 3 | from __future__ import division, print_function # Python 2 compatibility 4 | 5 | __author__ = "Lilian Besson" 6 | __version__ = "0.6" 7 | 8 | from itertools import product 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | 12 | # Local imports 13 | from sys import path; path.insert(0, '..') 14 | try: 15 | from .usenumba import jit 16 | from .usetqdm import tqdm 17 | from .plotsettings import maximizeWindow, legend 18 | from .Arms import * 19 | except ImportError: 20 | from usenumba import jit 21 | from usetqdm import tqdm 22 | from plotsettings import maximizeWindow, legend 23 | from Arms import * 24 | 25 | oneLR = Bernoulli.oneLR 26 | oneHOI = Bernoulli.oneHOI 27 | 28 | 29 | @jit 30 | def cmu(mu): 31 | """One LR term for Bernoulli problems.""" 32 | best = max(mu) 33 | return sum(oneLR(best, m) for m in mu if m != best) 34 | 35 | 36 | @jit 37 | def oi(mu): 38 | """One HOI term for Bernoulli problems.""" 39 | best = max(mu) 40 | return sum(oneHOI(best, m) for m in mu if m != best) / float(len(mu)) 41 | 42 | 43 | def addit(c, o, mu): 44 | """Add cmu(mu) to c and o(mu) to c if mu are not all equal.""" 45 | if len(set(mu)) > 1: 46 | c.append(cmu(mu)) 47 | o.append(oi(mu)) 48 | 49 | 50 | def main(K, N=50000, T=10): 51 | """Plot.""" 52 | print("Starting for K =", K) 53 | 54 | c1, o1 = [], [] 55 | for _ in tqdm(range(N), desc="Uniformly random (%d)" % N): 56 | mu = np.random.random(K) 57 | addit(c1, o1, mu) 58 | print("c: min =", min(c1), "max =", max(c1)) 59 | print("o: min =", min(o1), "max =", max(o1)) 60 | 61 | c2, o2 = [], [] 62 | for _ in tqdm(range(N), desc="Gaussian (%d)" % N): 63 | mu = np.minimum(1, np.maximum(0, np.random.normal(loc=0.5, scale=0.2, size=K))) 64 | addit(c2, o2, mu) 65 | print("c: min =", min(c2), "max =", max(c2)) 66 | print("o: min =", min(o2), "max =", max(o2)) 67 | 68 | c3, o3 = [], [] 69 | for mu in tqdm(product(np.linspace(0, 1, T), repeat=K), desc="Evenly spacen (%d)" % (T**K)): 70 | addit(c3, o3, mu) 71 | print("c: min =", min(c3), "max =", max(c3)) 72 | print("o: min =", min(o3), "max =", max(o3)) 73 | 74 | # for method in [plt.plot, plt.semilogx]: 75 | for method in [plt.semilogx]: 76 | plt.figure() 77 | method(c1, o1, 'o', ms=2, label="Uniform") 78 | method(c2, o2, 'x', ms=2, label="Gaussian") 79 | method(c3, o3, 'd', ms=2, label="Evenly spacen") 80 | legend() 81 | plt.xlabel(r"Lai & Robbins complexity constant, $C_{\mu}$") 82 | plt.ylabel(r"Navikkumar Modi HOI factor, $H_{OI}(\mu)$") 83 | plt.title("Comparison of two complexity criterion, for Bernoulli MAB problems, with $K = {}$ arms.".format(K)) 84 | maximizeWindow() 85 | plt.show() 86 | 87 | 88 | if __name__ == '__main__': 89 | for K in [3, 5, 7]: 90 | main(K) 91 | -------------------------------------------------------------------------------- /SMPyBandits/Environment/usejoblib.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Import Parallel and delayed from joblib, safely. 3 | """ 4 | from __future__ import division, print_function # Python 2 compatibility 5 | 6 | __author__ = "Lilian Besson" 7 | __version__ = "0.9" 8 | 9 | try: 10 | from joblib import Parallel, delayed 11 | USE_JOBLIB = True 12 | except ImportError: 13 | print("Warning: joblib not found. Install it from pypi ('pip install joblib') or conda.\n Info: Not mandatory, but it improves speed computation on multi-core machines.") 14 | USE_JOBLIB = False 15 | 16 | # In case the code uses Parallel and delayed, even if USE_JOBLIB is False 17 | def Parallel(*args, **kwargs): 18 | """Fake joblib.Parallel implementation.""" 19 | def fakeParallelWrapper(iterator): 20 | """ Just a list(iterator).""" 21 | return list(iterator) 22 | return fakeParallelWrapper 23 | 24 | def delayed(f, *args, **kwargs): 25 | """Fake joblib.delayed implementation.""" 26 | return f 27 | 28 | 29 | # Only export and expose the useful functions defined here 30 | __all__ = [ 31 | "USE_JOBLIB", 32 | "Parallel", 33 | "delayed" 34 | ] 35 | -------------------------------------------------------------------------------- /SMPyBandits/Environment/usenumba.py: -------------------------------------------------------------------------------- 1 | ../Policies/usenumba.py -------------------------------------------------------------------------------- /SMPyBandits/Environment/usetqdm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Import tqdm from tqdm, safely. 3 | """ 4 | from __future__ import division, print_function # Python 2 compatibility 5 | 6 | __author__ = "Lilian Besson" 7 | __version__ = "0.9" 8 | 9 | 10 | def in_notebook(): 11 | """Check if the code is running inside a Jupyter notebook or not. Cf. http://stackoverflow.com/a/39662359/. 12 | 13 | >>> in_notebook() 14 | False 15 | """ 16 | try: 17 | shell = get_ipython().__class__.__name__ 18 | if shell == 'ZMQInteractiveShell': # Jupyter notebook or qtconsole? 19 | return True 20 | elif shell == 'TerminalInteractiveShell': # Terminal running IPython? 21 | return False 22 | else: 23 | return False # Other type (?) 24 | except NameError: 25 | return False # Probably standard Python interpreter 26 | 27 | 28 | try: 29 | if in_notebook(): 30 | from tqdm.notebook import tqdm 31 | print("Info: Using the Jupyter notebook version of the tqdm() decorator, tqdm_notebook() ...") # DEBUG 32 | else: 33 | from tqdm import tqdm 34 | # print("Info: Using the regular tqdm() decorator ...") # DEBUG 35 | USE_TQDM = True 36 | except ImportError: 37 | print("Warning: tqdm not found. Install it from pypi ('pip install tqdm') or conda.\n Info: Not mandatory, but it's pretty!") 38 | USE_TQDM = False 39 | 40 | def tqdm(iterator, *args, **kwargs): 41 | """Fake tqdm.tqdm wrapper, ignore **kwargs like desc='...', and return iterator.""" 42 | return iterator 43 | 44 | 45 | # Only export and expose the useful functions defined here 46 | __all__ = [ 47 | "USE_TQDM", 48 | "tqdm", 49 | ] 50 | -------------------------------------------------------------------------------- /SMPyBandits/Experiment/Seznec19a_Fig1/.gitignore: -------------------------------------------------------------------------------- 1 | data/ -------------------------------------------------------------------------------- /SMPyBandits/Experiment/Seznec19a_Fig1/style.mplstyle: -------------------------------------------------------------------------------- 1 | xtick.labelsize: 25 2 | ytick.labelsize: 25 3 | font.size: 40 4 | figure.autolayout: False 5 | figure.figsize: 7.2,4.45 6 | axes.titlesize : 50 7 | axes.labelsize : 40 8 | lines.linewidth : 2 9 | lines.markersize : 6 10 | legend.fontsize: 25 11 | mathtext.fontset: stix 12 | font.family: STIXGeneral 13 | pdf.fonttype : 42 14 | ps.fonttype : 42 15 | axes.grid: False 16 | axes.edgecolor: .15 17 | axes.linewidth: 1.25 -------------------------------------------------------------------------------- /SMPyBandits/Experiment/Seznec19a_Fig2/.gitignore: -------------------------------------------------------------------------------- 1 | data/ -------------------------------------------------------------------------------- /SMPyBandits/Experiment/Seznec19a_Fig2/style.mplstyle: -------------------------------------------------------------------------------- 1 | xtick.labelsize: 25 2 | ytick.labelsize: 25 3 | font.size: 40 4 | figure.autolayout: False 5 | figure.figsize: 7.2,4.45 6 | axes.titlesize : 50 7 | axes.labelsize : 40 8 | lines.linewidth : 2 9 | lines.markersize : 6 10 | legend.fontsize: 25 11 | mathtext.fontset: stix 12 | font.family: STIXGeneral 13 | pdf.fonttype : 42 14 | ps.fonttype : 42 15 | axes.grid: False 16 | axes.edgecolor: .15 17 | axes.linewidth: 1.25 -------------------------------------------------------------------------------- /SMPyBandits/Experiment/Seznec19a_Fig3/.gitignore: -------------------------------------------------------------------------------- 1 | data/ -------------------------------------------------------------------------------- /SMPyBandits/Experiment/Seznec19a_Fig3/plot.py: -------------------------------------------------------------------------------- 1 | """ 2 | author: Julien SEZNEC 3 | Plot utility to reproduce Figure 3 of [Seznec et al., 2019a] 4 | Reference: [Seznec et al., 2019a] 5 | Rotting bandits are not harder than stochastic ones; 6 | Julien Seznec, Andrea Locatelli, Alexandra Carpentier, Alessandro Lazaric, Michal Valko ; 7 | Proceedings of Machine Learning Research, PMLR 89:2564-2572, 2019. 8 | http://proceedings.mlr.press/v89/seznec19a.html 9 | https://arxiv.org/abs/1811.11043 (updated version) 10 | """ 11 | from matplotlib import pyplot as plt 12 | from SMPyBandits.Policies import FEWA, UCB 13 | import os 14 | import numpy as np 15 | 16 | plt.style.use('seaborn-colorblind') 17 | plt.style.use('style.mplstyle') 18 | 19 | 20 | 21 | def fig3(data, delta , name='fig3A.pdf', ylim=300): 22 | # -------------- PLOT -------------- 23 | fig, ax = plt.subplots(figsize=(12, 10)) 24 | for i, policy in enumerate(data): 25 | print(data[policy]["mean"]) 26 | X = range(data[policy]["mean"].shape[0]) 27 | ax.plot(X, data[policy]["mean"], label=policy, linewidth=3) 28 | color = ax.get_lines()[-1].get_c() 29 | ax.plot(X, data[policy]["uppq"], label=None, linestyle='--', color=color, linewidth=1) 30 | ax.plot(X, data[policy]["lowq"], label=None, linestyle='--', color=color, linewidth=1) 31 | plt.fill_between(X, data[policy]["uppq"], data[policy]["lowq"], alpha=.05, color=color) 32 | plt.xlim(0,5000) 33 | plt.ylim(0, ylim) 34 | plt.legend(prop={'variant': 'small-caps'}) 35 | plt.xlabel('Round ($t$)') 36 | plt.ylabel('Average regret $R_t$') 37 | ax.xaxis.set_label_coords(0.5, -0.08) 38 | ax.yaxis.set_label_coords(-0.09, 0.5) 39 | plt.title('$\Delta = {:.3g}$'.format(delta), y=1.04) 40 | # -------------- SAVE -------------- 41 | plt.savefig(name) 42 | 43 | 44 | if __name__ == "__main__": 45 | for game in range(1,3): 46 | policies = [ 47 | [FEWA, {'alpha': .01, 'delta': 1, 'subgaussian': 1}], 48 | [FEWA, {'alpha': .06, 'delta': 1, 'subgaussian': 1}], 49 | [FEWA, {'alpha': 0.25, 'delta': 1, 'subgaussian': 1}], 50 | [UCB, {}] 51 | ] 52 | data = {} 53 | for policy in policies: 54 | policy_name = str(policy[0](nbArms=2, **policy[1])) 55 | policy_name_nospace = policy_name.replace(' ', '_') 56 | policy_data = [ 57 | np.load(os.path.join('./data', file)) for file in os.listdir('./data') if 58 | file.startswith("REGRET%s_"%game + policy_name_nospace) 59 | ] 60 | if not policy_data: 61 | continue 62 | policy_data_array = np.concatenate(policy_data, axis=0) 63 | print(len(policy_data), policy_data_array.shape) 64 | data[policy_name] = { 65 | "mean": policy_data_array.mean(axis=0), 66 | "uppq": np.quantile(policy_data_array, 0.9, axis=0), 67 | "lowq": np.quantile(policy_data_array, 0.1, axis=0) 68 | } 69 | 70 | fig3(data, delta=0.14 if game == 1 else 1, name='fig3%s.pdf'%game) 71 | -------------------------------------------------------------------------------- /SMPyBandits/Experiment/Seznec19a_Fig3/style.mplstyle: -------------------------------------------------------------------------------- 1 | xtick.labelsize: 25 2 | ytick.labelsize: 25 3 | font.size: 40 4 | figure.autolayout: False 5 | figure.figsize: 7.2,4.45 6 | axes.titlesize : 50 7 | axes.labelsize : 40 8 | lines.linewidth : 2 9 | lines.markersize : 6 10 | legend.fontsize: 25 11 | mathtext.fontset: stix 12 | font.family: STIXGeneral 13 | pdf.fonttype : 42 14 | ps.fonttype : 42 15 | axes.grid: False 16 | axes.edgecolor: .15 17 | axes.linewidth: 1.25 -------------------------------------------------------------------------------- /SMPyBandits/Experiment/Seznec19b_Fig1/.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | Reward/ 3 | -------------------------------------------------------------------------------- /SMPyBandits/Experiment/Seznec19b_Fig1/style.mplstyle: -------------------------------------------------------------------------------- 1 | xtick.labelsize: 25 2 | ytick.labelsize: 25 3 | font.size: 40 4 | figure.autolayout: False 5 | figure.figsize: 7.2,4.45 6 | axes.titlesize : 50 7 | axes.labelsize : 40 8 | lines.linewidth : 2 9 | lines.markersize : 6 10 | legend.fontsize: 25 11 | mathtext.fontset: stix 12 | font.family: STIXGeneral 13 | pdf.fonttype : 42 14 | ps.fonttype : 42 15 | axes.grid: False 16 | axes.edgecolor: .15 17 | axes.linewidth: 1.25 -------------------------------------------------------------------------------- /SMPyBandits/Experiment/Seznec_EFF/.gitignore: -------------------------------------------------------------------------------- 1 | data/ -------------------------------------------------------------------------------- /SMPyBandits/Experiment/Seznec_EFF/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | author: Julien SEZNEC 3 | Produce the experiment about the efficiency of EFF_RAWUCB 4 | For the thesis manuscript. 5 | """ 6 | 7 | from SMPyBandits.Arms import RestedRottingGaussian 8 | from SMPyBandits.Policies import GreedyOracle, RAWUCB, EFF_RAWUCB, wSWA 9 | from SMPyBandits.Environment.MAB_rotting import repetedRuns 10 | import numpy as np 11 | import datetime 12 | import os 13 | import logging 14 | import sys 15 | 16 | date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f") 17 | PARALLEL = -1 # Set positive int to indicate the number of core, -1 to use all the cores, and False to not parallelize 18 | REPETITIONS = 1 if len(sys.argv) < 3 else int(sys.argv[2]) # Set the number of repetitions 19 | HORIZON = T = 10**6 # Horizon T 20 | sigma = 1 # Gaussian noise std 21 | K = 2 22 | mu = 0.1 23 | 24 | ### SET Policies 25 | policies = [ 26 | [RAWUCB, {'alpha': 1.4}], # 0 27 | [EFF_RAWUCB, {'alpha': 1.4, 'm':2}], # 1 28 | [wSWA, {'alpha': 0.002}], # 2 29 | [wSWA, {'alpha': 0.02}], # 3 30 | [wSWA, {'alpha': 0.2}], # 4 31 | [EFF_RAWUCB, {'alpha': 1.4, 'm': 1.01}], # 5 32 | [EFF_RAWUCB, {'alpha': 1.4, 'm': 1.1}], # 6 33 | [EFF_RAWUCB, {'alpha': 1.4, 'm': 1.2}], # 7 34 | [EFF_RAWUCB, {'alpha': 1.4, 'm': 1.3}], # 8 35 | [EFF_RAWUCB, {'alpha': 1.4, 'm': 1.5}], # 9 36 | [EFF_RAWUCB, {'alpha': 1.4, 'm': 1.9}], # 10 37 | [EFF_RAWUCB, {'alpha': 1.4, 'm': 2.1}], # 11 38 | [EFF_RAWUCB, {'alpha': 1.4, 'm': 3}], # 12 39 | [EFF_RAWUCB, {'alpha': 1.4, 'm': 10}], # 13 40 | ] 41 | policy_ind = 10 if len(sys.argv) == 1 else int(sys.argv[1]) 42 | policy = policies[policy_ind] 43 | policy_name = str(policy[0](nbArms=2, **policy[1])) 44 | policy_name_nospace = policy_name.replace(' ', '_') 45 | 46 | regret_path = os.path.join('./data', 'REGRET_' + policy_name_nospace + '_' + date) 47 | time_path = os.path.join('./data', 'TIME_' + policy_name_nospace + '_' + date) 48 | os.makedirs('./data/logging/', exist_ok=True) 49 | logging.basicConfig(filename=os.path.join('./data/logging', date + '.log'), level=logging.INFO, 50 | format='%(asctime)s %(message)s') 51 | logging.info("Policy : %s$" % (policy_name)) 52 | 53 | ### SET L/2 54 | logging.info("CONFIG : CPU %s" % os.cpu_count()) 55 | logging.info("CONFIG : REPETITIONS %s" % REPETITIONS) 56 | logging.info("CONFIG : HORIZON %s" % HORIZON) 57 | logging.info("CONFIG : SIGMA %s" % sigma) 58 | logging.info("CONFIG : $\mu = %s$" % mu) 59 | 60 | noisy_reward_res = [] 61 | regret_res = [] 62 | time_res = [] 63 | overpull_res = [] 64 | ### SET K arms 65 | arms = [ 66 | [ 67 | RestedRottingGaussian, 68 | {'decayingFunction': lambda n: mu if n <= HORIZON / 4 else -mu, 'sigma': sigma, } 69 | ], 70 | [ 71 | RestedRottingGaussian, 72 | {'decayingFunction': lambda n: 0, 'sigma': sigma, } 73 | ], 74 | ] 75 | rew, noisy_rew, time, pulls, cumul_pulls = repetedRuns(policy, arms, rep=REPETITIONS, T=HORIZON, parallel=PARALLEL) 76 | oracle_rew, noisy_oracle_rew, oracle_time, oracle_pull, oracle_cumul_pulls = repetedRuns( 77 | [GreedyOracle, {}], arms, rep=1, T=HORIZON, oracle=True 78 | ) 79 | regret = oracle_rew - rew 80 | regret_res.append(regret) 81 | time_res.append(time) 82 | logging.info("EVENT : SAVING ... ") 83 | np.save(regret_path, np.array(regret_res)) 84 | np.save(time_path, np.array(time_res)) 85 | logging.info("EVENT : END ... ") 86 | -------------------------------------------------------------------------------- /SMPyBandits/Experiment/Seznec_EFF/style.mplstyle: -------------------------------------------------------------------------------- 1 | xtick.labelsize: 25 2 | ytick.labelsize: 25 3 | font.size: 40 4 | figure.autolayout: False 5 | figure.figsize: 7.2,4.45 6 | axes.titlesize : 50 7 | axes.labelsize : 40 8 | lines.linewidth : 2 9 | lines.markersize : 6 10 | legend.fontsize: 25 11 | mathtext.fontset: stix 12 | font.family: STIXGeneral 13 | pdf.fonttype : 42 14 | ps.fonttype : 42 15 | axes.grid: False 16 | axes.edgecolor: .15 17 | axes.linewidth: 1.25 -------------------------------------------------------------------------------- /SMPyBandits/Experiment/Seznec_asymptotic/.gitignore: -------------------------------------------------------------------------------- 1 | data/ -------------------------------------------------------------------------------- /SMPyBandits/Experiment/Seznec_asymptotic/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | author: Julien SEZNEC 3 | Produce the experiment about the (potential) asymptotic optimality of RAW-UCB++ 4 | For the thesis manuscript. 5 | """ 6 | 7 | from SMPyBandits.Arms import RestedRottingGaussian, UnboundedGaussian as Gaussian 8 | from SMPyBandits.Policies import GreedyOracle, RAWUCB, EFF_RAWUCB, EFF_RAWUCB_pp, MOSSAnytime, UCB 9 | from SMPyBandits.Environment.MAB_rotting import repetedRuns 10 | import numpy as np 11 | import datetime 12 | import os 13 | import logging 14 | import sys 15 | 16 | date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f") 17 | PARALLEL = -1 # Set positive int to indicate the number of core, -1 to use all the cores, and False to not parallelize 18 | REPETITIONS = 1 if len(sys.argv) < 3 else int(sys.argv[2]) # Set the number of repetitions 19 | HORIZON = T = 10**6 # Horizon T 20 | sigma = 1 # Gaussian noise std 21 | K = 2 22 | 23 | ### SET Policies 24 | policies = [ 25 | [MOSSAnytime, {'alpha':3}], #0 26 | [EFF_RAWUCB, {'alpha': 1.4, 'm': 1.01}], # 1 27 | [EFF_RAWUCB_pp, {'beta': 0, 'm': 1.01}], # 2 28 | [EFF_RAWUCB_pp, {'beta': 1, 'm': 1.01}], # 3 29 | [EFF_RAWUCB_pp, {'beta': 2, 'm': 1.01}], # 4 30 | [EFF_RAWUCB_pp, {'beta': 3, 'm': 1.01}], # 5 31 | [UCB, {}], #6 32 | [EFF_RAWUCB_pp, {'beta': 2.5, 'm': 1.01}], # 7 33 | [EFF_RAWUCB_pp, {'beta': 3.5, 'm': 1.01}], # 8 34 | [EFF_RAWUCB_pp, {'alpha': 1.3, 'm': 1.01}], # 9 35 | [EFF_RAWUCB_pp, {'alpha': 1.4, 'm': 1.01}], # 10 36 | [EFF_RAWUCB_pp, {'alpha': 1.5, 'm': 1.01}], # 11 37 | [EFF_RAWUCB_pp, {'alpha': 1.7, 'm': 1.01}], # 12 38 | ] 39 | policy_ind = 9 if len(sys.argv) == 1 else int(sys.argv[1]) 40 | policy = policies[policy_ind] 41 | policy_name = str(policy[0](nbArms=2, **policy[1])) 42 | policy_name_nospace = policy_name.replace(' ', '_') 43 | 44 | regret_path = os.path.join('./data', 'REGRET_' + policy_name_nospace + '_' + date) 45 | time_path = os.path.join('./data', 'TIME_' + policy_name_nospace + '_' + date) 46 | os.makedirs('./data/logging/', exist_ok=True) 47 | logging.basicConfig(filename=os.path.join('./data/logging', date + '.log'), level=logging.INFO, 48 | format='%(asctime)s %(message)s') 49 | logging.info("Policy : %s$" % (policy_name)) 50 | 51 | ### SET L/2 52 | mus = [0.01, 1] 53 | logging.info("CONFIG : CPU %s" % os.cpu_count()) 54 | logging.info("CONFIG : REPETITIONS %s" % REPETITIONS) 55 | logging.info("CONFIG : HORIZON %s" % HORIZON) 56 | logging.info("CONFIG : SIGMA %s" % sigma) 57 | 58 | noisy_reward_res = [] 59 | regret_res = [] 60 | time_res = [] 61 | overpull_res = [] 62 | for m, mu in enumerate(mus): 63 | logging.info("GAME %s : $\mu = %s$" % (m, mu)) 64 | print(mu) 65 | ### SET K arms 66 | arms = [ 67 | [Gaussian, {"mu":0, "sigma": sigma}], 68 | [Gaussian, {"mu":mu, "sigma": sigma}] 69 | ] 70 | rew, noisy_rew, time, pulls, cumul_pulls = repetedRuns(policy, arms, rep=REPETITIONS, T=HORIZON, parallel=PARALLEL) 71 | oracle_rew, noisy_oracle_rew, oracle_time, oracle_pull, oracle_cumul_pulls = repetedRuns( 72 | [GreedyOracle, {}], arms, rep=1, T=HORIZON, oracle=True 73 | ) 74 | regret = oracle_rew - rew 75 | regret_res.append(regret) 76 | # time_res.append(time) 77 | logging.info("EVENT : SAVING ... ") 78 | np.save(regret_path, np.array(regret_res)) 79 | logging.info("EVENT : END ... ") 80 | -------------------------------------------------------------------------------- /SMPyBandits/Experiment/Seznec_asymptotic/style.mplstyle: -------------------------------------------------------------------------------- 1 | xtick.labelsize: 25 2 | ytick.labelsize: 25 3 | font.size: 40 4 | figure.autolayout: False 5 | figure.figsize: 7.2,4.45 6 | axes.titlesize : 50 7 | axes.labelsize : 40 8 | lines.linewidth : 2 9 | lines.markersize : 6 10 | legend.fontsize: 25 11 | mathtext.fontset: stix 12 | font.family: STIXGeneral 13 | pdf.fonttype : 42 14 | ps.fonttype : 42 15 | axes.grid: False 16 | axes.edgecolor: .15 17 | axes.linewidth: 1.25 -------------------------------------------------------------------------------- /SMPyBandits/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016-2018 Lilian Besson (Naereen), https://GitHub.com/Naereen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/.gitignore: -------------------------------------------------------------------------------- 1 | # automatically generated with cython for kullback_cython.pyx 2 | kullback.c 3 | kullback_cython.c 4 | build/ 5 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/BayesUCB.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The Bayes-UCB policy. 3 | 4 | - By default, it uses a Beta posterior (:class:`Policies.Posterior.Beta`), one by arm. 5 | - Reference: [Kaufmann, Cappé & Garivier - AISTATS, 2012] 6 | """ 7 | from __future__ import division, print_function # Python 2 compatibility 8 | 9 | __author__ = "Olivier Cappé, Aurélien Garivier, Emilie Kaufmann, Lilian Besson" 10 | __version__ = "0.5" 11 | 12 | try: 13 | from .BayesianIndexPolicy import BayesianIndexPolicy 14 | except ImportError: 15 | from BayesianIndexPolicy import BayesianIndexPolicy 16 | 17 | 18 | class BayesUCB(BayesianIndexPolicy): 19 | """ The Bayes-UCB policy. 20 | 21 | - By default, it uses a Beta posterior (:class:`Policies.Posterior.Beta`), one by arm. 22 | -Reference: [Kaufmann, Cappé & Garivier - AISTATS, 2012]. 23 | """ 24 | 25 | def computeIndex(self, arm): 26 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k, giving :math:`S_k(t)` rewards of 1, by taking the :math:`1 - \frac{1}{t}` quantile from the Beta posterior: 27 | 28 | .. math:: I_k(t) = \mathrm{Quantile}\left(\mathrm{Beta}(1 + S_k(t), 1 + N_k(t) - S_k(t)), 1 - \frac{1}{t}\right). 29 | """ 30 | return self.posterior[arm].quantile(1. - 1. / (1 + self.t)) 31 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/BayesianIndexPolicy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Basic Bayesian index policy. By default, it uses a Beta posterior. """ 3 | from __future__ import division, print_function # Python 2 compatibility 4 | 5 | __author__ = "Lilian Besson" 6 | __version__ = "0.9" 7 | 8 | try: 9 | from .IndexPolicy import IndexPolicy 10 | from .Posterior import Beta 11 | except ImportError: 12 | from IndexPolicy import IndexPolicy 13 | from Posterior import Beta 14 | 15 | 16 | class BayesianIndexPolicy(IndexPolicy): 17 | """ Basic Bayesian index policy. 18 | 19 | - By default, it uses a Beta posterior (:class:`Policies.Posterior.Beta`), one by arm. 20 | - Use ``*args`` and ``**kwargs`` if you want to give parameters to the underlying posteriors. 21 | - Or use ``params_for_each_posterior`` as a *list* of parameters (as a dictionary) to give a different set of parameters for each posterior. 22 | """ 23 | 24 | def __init__(self, nbArms, 25 | posterior=Beta, 26 | lower=0., amplitude=1., 27 | *args, **kwargs 28 | ): 29 | """ Create a new Bayesian policy, by creating a default posterior on each arm.""" 30 | super(BayesianIndexPolicy, self).__init__(nbArms, lower=lower, amplitude=amplitude) 31 | self.posterior = [None] * nbArms #: Posterior for each arm. List instead of dict, quicker access 32 | if 'params_for_each_posterior' in kwargs: 33 | params = kwargs['params_for_each_posterior'] 34 | print("'params_for_each_posterior' is in kwargs, so using params =\n{}\nas a list of parameters to give to each posterior.".format(params)) # DEBUG 35 | for arm in range(self.nbArms): 36 | print("Creating posterior for arm {}, with params = {}.".format(arm, params[arm])) # DEBUG 37 | self.posterior[arm] = posterior(**params[arm]) 38 | else: 39 | for arm in range(self.nbArms): 40 | # print("Creating posterior for arm {}, with args = {} and kwargs = {}.".format(arm, args, kwargs)) # DEBUG 41 | self.posterior[arm] = posterior(*args, **kwargs) 42 | self._posterior_name = str(self.posterior[0].__class__.__name__) 43 | 44 | def __str__(self): 45 | """ -> str""" 46 | if self._posterior_name == "Beta": 47 | return "{}".format(self.__class__.__name__) 48 | else: 49 | return "{}({})".format(self.__class__.__name__, self._posterior_name) 50 | 51 | def startGame(self): 52 | """ Reset the posterior on each arm.""" 53 | self.t = 0 54 | for arm in range(self.nbArms): 55 | self.posterior[arm].reset() 56 | # print("Policy {} reinitialized with posteriors: {}".format(self, [str(p) for p in self.posterior])) # DEBUG 57 | 58 | def getReward(self, arm, reward): 59 | """ Update the posterior on each arm, with the normalized reward.""" 60 | self.posterior[arm].update((reward - self.lower) / self.amplitude) 61 | self.t += 1 62 | 63 | def computeIndex(self, arm): 64 | raise NotImplementedError("This method computeIndex(arm) has to be implemented in the child class inheriting from BayesianIndexPolicy.") 65 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/BoltzmannGumbel.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The Boltzmann-Gumbel Exploration (BGE) index policy, a different formulation of the :class:`Exp3` policy with an optimally tune decreasing sequence of temperature parameters :math:`\gamma_t`. 3 | 4 | - Reference: Section 4 of [Boltzmann Exploration Done Right, N.Cesa-Bianchi & C.Gentile & G.Lugosi & G.Neu, arXiv 2017](https://arxiv.org/pdf/1705.10257.pdf). 5 | - It is an index policy with indexes computed from the empirical mean estimators and a random sample from a Gumbel distribution. 6 | """ 7 | from __future__ import division, print_function # Python 2 compatibility 8 | 9 | __author__ = "Lilian Besson" 10 | __version__ = "0.6" 11 | 12 | import numpy as np 13 | import numpy.random as rn 14 | 15 | try: 16 | from .IndexPolicy import IndexPolicy 17 | except ImportError: 18 | from IndexPolicy import IndexPolicy 19 | 20 | 21 | #: Default constant :math:`\sigma` assuming the arm distributions are :math:`\sigma^2`-subgaussian. 1 for Bernoulli arms. 22 | SIGMA = 1 23 | 24 | class BoltzmannGumbel(IndexPolicy): 25 | r""" The Boltzmann-Gumbel Exploration (BGE) index policy, a different formulation of the :class:`Exp3` policy with an optimally tune decreasing sequence of temperature parameters :math:`\gamma_t`. 26 | 27 | - Reference: Section 4 of [Boltzmann Exploration Done Right, N.Cesa-Bianchi & C.Gentile & G.Lugosi & G.Neu, arXiv 2017](https://arxiv.org/pdf/1705.10257.pdf). 28 | - It is an index policy with indexes computed from the empirical mean estimators and a random sample from a Gumbel distribution. 29 | """ 30 | 31 | def __init__(self, nbArms, C=SIGMA, lower=0., amplitude=1.): 32 | super(BoltzmannGumbel, self).__init__(nbArms, lower=lower, amplitude=amplitude) 33 | assert C > 0, "Error: the C parameter for BoltzmannGumbel class has to be > 0." 34 | self.C = C 35 | 36 | def __str__(self): 37 | return r"BoltzmannGumbel($\alpha={:.3g}$)".format(self.C) 38 | 39 | def computeIndex(self, arm): 40 | r""" Take a random index, at time t and after :math:`N_k(t)` pulls of arm k: 41 | 42 | .. math:: 43 | 44 | I_k(t) &= \frac{X_k(t)}{N_k(t)} + \beta_k(t) Z_k(t), \\ 45 | \text{where}\;\; \beta_k(t) &:= \sqrt{C^2 / N_k(t)}, \\ 46 | \text{and}\;\; Z_k(t) &\sim \mathrm{Gumbel}(0, 1). 47 | 48 | Where :math:`\mathrm{Gumbel}(0, 1)` is the standard Gumbel distribution. 49 | See [Numpy documentation](https://docs.scipy.org/doc/numpy/reference/generated/numpy.random.gumbel.html#numpy.random.gumbel) or [Wikipedia page](https://en.wikipedia.org/wiki/Gumbel_distribution) for more details. 50 | """ 51 | if self.pulls[arm] < 1: 52 | return float('+inf') 53 | else: 54 | beta_k_t = np.sqrt(self.C ** 2 / self.pulls[arm]) 55 | z_k_t = rn.gumbel(0, 1) 56 | return (self.rewards[arm] / self.pulls[arm]) + beta_k_t * z_k_t 57 | 58 | def computeAllIndex(self): 59 | """ Compute the current indexes for all arms, in a vectorized manner.""" 60 | beta_t = np.sqrt(self.C ** 2 / self.pulls) 61 | z_t = rn.gumbel(0, 1, self.nbArms) # vector samples 62 | indexes = (self.rewards / self.pulls) + beta_t * z_t 63 | indexes[self.pulls < 1] = float('+inf') 64 | self.index[:] = indexes 65 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/C/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | *.so 3 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/C/Makefile: -------------------------------------------------------------------------------- 1 | # GNU Make makefile to build the kullback C extension 2 | all: clean build install clean 3 | 4 | build: build2 build3 5 | 6 | build2: kullback.c setup.py 7 | python2 setup.py build 8 | 9 | build3: kullback_py3.c setup.py3 10 | python3 setup.py3 build 11 | 12 | install: 13 | \cp build/lib*/kullback.* ../ 14 | 15 | clean: setup.py 16 | python2 setup.py clean 17 | python3 setup.py3 clean 18 | #rm -rvf build/* 19 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/C/README.md: -------------------------------------------------------------------------------- 1 | # Fast C versions of the utilities in [`kullpack.py`](../kullback.py) 2 | 3 | ## Prefer the Cython version? 4 | WARNING: I have now written a Cython version of this module, see [`kullback_cython.pyx`](../kullback_cython.pyx). 5 | It has all the advantages of the C version (speed and memory efficiency), and all the advantages of the Python version (documentation, optional arguments). 6 | 7 | You can have a look to the first examples in [`kullback_cython.pyx`](../kullback_cython.pyx) to see a small comparison between the Cython and C versions. 8 | 9 | TL;DR: I don't recommend that you try using this C version, it's not worth it: the C version is only 2 times faster than the Cython one, and both are between 100 to 200 times faster than the naive Python versions! 10 | 11 | ### Requirements? 12 | You need either the `cython` package for your version of Python (if you want to compile the [`kullback_cython.pyx`](../kullback_cython.pyx) file before running your extension), or both the `cython` and `pyximport` packages, if you want to be able to directly import the Cython version with: 13 | 14 | ```python 15 | >>> import pyximport; pyximport.install() 16 | >>> import kullback_cython as kullback 17 | >>> # then use kullback.klucbBern or others, as if they came from the pure Python version! 18 | ``` 19 | 20 | --- 21 | 22 | ## Build it 23 | To create the module use 24 | 25 | ```bash 26 | python setup.py build 27 | python3 setup.py build 28 | ``` 29 | 30 | Or simply use the provided [`Makefile`](Makefile): 31 | 32 | ```bash 33 | make build 34 | ``` 35 | 36 | The compiled module (`.so` file) will appear in `build/lib.???` (typically `yoursys-yourarch-yourversion`). 37 | 38 | ## Clean-up 39 | Temporary files in `build/temp.*` can be removed with 40 | 41 | ```bash 42 | python setup.py clean 43 | python3 setup.py clean 44 | ``` 45 | 46 | Or simply use the provided [`Makefile`](Makefile): 47 | 48 | ```bash 49 | make build 50 | ``` 51 | 52 | ## Requirements 53 | Building requires the header files and static library, typically available in a package called `python-dev` (on Linux systems). 54 | See [the Python documentation](https://docs.python.org/3/c-api/) for more details. 55 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/C/setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Utility for building the C library for Python 2.""" 3 | 4 | __author__ = "Olivier Cappé, Aurélien Garivier" 5 | __version__ = "$Revision: 1.3 $" 6 | 7 | from distutils.core import setup, Extension 8 | 9 | module1 = Extension('kullback', sources=['kullback.c']) 10 | 11 | 12 | setup(name='Kullback utilities', 13 | version='1.0', 14 | description='computes various KL divergences', 15 | ext_modules=[module1] 16 | ) 17 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/C/setup.py3: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Utility for building the C library for Python 3.""" 3 | 4 | __author__ = "Olivier Cappé, Aurélien Garivier" 5 | __version__ = "$Revision: 1.3 $" 6 | 7 | from distutils.core import setup, Extension 8 | 9 | module1 = Extension('kullback', sources=['kullback_py3.c']) 10 | 11 | 12 | setup(name='Kullback utilities', 13 | version='1.0', 14 | description='computes various KL divergences', 15 | ext_modules=[module1] 16 | ) 17 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/DiscountedBayesianIndexPolicy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Discounted Bayesian index policy. 3 | 4 | - By default, it uses a DiscountedBeta posterior (:class:`Policies.Posterior.DiscountedBeta`), one by arm. 5 | - Use discount factor :math:`\gamma\in(0,1)`. 6 | 7 | .. warning:: This is still highly experimental! 8 | """ 9 | from __future__ import division, print_function # Python 2 compatibility 10 | 11 | __author__ = "Lilian Besson" 12 | __version__ = "0.9" 13 | 14 | try: 15 | from .BayesianIndexPolicy import BayesianIndexPolicy 16 | from .Posterior import DiscountedBeta 17 | except ImportError: 18 | from BayesianIndexPolicy import BayesianIndexPolicy 19 | from Posterior import DiscountedBeta 20 | 21 | 22 | # --- Constants 23 | 24 | #: Default value for the discount factor :math:`\gamma\in(0,1)`. 25 | #: ``0.95`` is empirically a reasonable value for short-term non-stationary experiments. 26 | GAMMA = 0.95 27 | 28 | 29 | # --- Class 30 | 31 | class DiscountedBayesianIndexPolicy(BayesianIndexPolicy): 32 | r""" Discounted Bayesian index policy. 33 | 34 | - By default, it uses a DiscountedBeta posterior (:class:`Policies.Posterior.DiscountedBeta`), one by arm. 35 | - Use discount factor :math:`\gamma\in(0,1)`. 36 | 37 | - It keeps :math:`\widetilde{S_k}(t)` and :math:`\widetilde{F_k}(t)` the discounted counts of successes and failures (S and F), for each arm k. 38 | 39 | - But instead of using :math:`\widetilde{S_k}(t) = S_k(t)` and :math:`\widetilde{N_k}(t) = N_k(t)`, they are updated at each time step using the discount factor :math:`\gamma`: 40 | 41 | .. math:: 42 | 43 | \widetilde{S_{A(t)}}(t+1) &= \gamma \widetilde{S_{A(t)}}(t) + r(t),\\ 44 | \widetilde{S_{k'}}(t+1) &= \gamma \widetilde{S_{k'}}(t), \forall k' \neq A(t). 45 | 46 | .. math:: 47 | 48 | \widetilde{F_{A(t)}}(t+1) &= \gamma \widetilde{F_{A(t)}}(t) + (1 - r(t)),\\ 49 | \widetilde{F_{k'}}(t+1) &= \gamma \widetilde{F_{k'}}(t), \forall k' \neq A(t). 50 | """ 51 | 52 | def __init__(self, nbArms, 53 | gamma=GAMMA, posterior=DiscountedBeta, 54 | lower=0., amplitude=1., 55 | *args, **kwargs 56 | ): 57 | """ Create a new Bayesian policy, by creating a default posterior on each arm.""" 58 | super(DiscountedBayesianIndexPolicy, self).__init__(nbArms, posterior=posterior, lower=lower, amplitude=amplitude, gamma=gamma) 59 | assert 0 < gamma <= 1, "Error: for a DiscountedBayesianIndexPolicy policy, the discount factor has to be in [0,1], but it was {}.".format(gamma) # DEBUG 60 | if gamma == 1: 61 | print("Warning: gamma = 1 is stupid, just use a regular Beta posterior!") # DEBUG 62 | self.gamma = gamma #: Discount factor :math:`\gamma\in(0,1)`. 63 | 64 | def __str__(self): 65 | """ -> str""" 66 | return r"{}($\gamma={:.5g}${})".format(self.__class__.__name__, self.gamma, self._posterior_name if self._posterior_name != "DiscountedBeta" else "") 67 | 68 | def getReward(self, arm, reward): 69 | """ Update the posterior on each arm, with the normalized reward.""" 70 | self.posterior[arm].update((reward - self.lower) / self.amplitude) 71 | # DONE we should update the other posterior with "no observation" 72 | for otherArm in range(self.nbArms): 73 | if otherArm != arm: 74 | self.posterior[arm].discount() 75 | self.t += 1 -------------------------------------------------------------------------------- /SMPyBandits/Policies/DiscountedThompson.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The Discounted Thompson (Bayesian) index policy. 3 | 4 | - By default, it uses a DiscountedBeta posterior (:class:`Policies.Posterior.DiscountedBeta`), one by arm. 5 | - Reference: [["Taming Non-stationary Bandits: A Bayesian Approach", Vishnu Raj & Sheetal Kalyani, arXiv:1707.09727](https://arxiv.org/abs/1707.09727)]. 6 | 7 | .. warning:: This is still highly experimental! 8 | """ 9 | from __future__ import division, print_function # Python 2 compatibility 10 | 11 | __author__ = "Lilian Besson" 12 | __version__ = "0.9" 13 | 14 | try: 15 | from .DiscountedBayesianIndexPolicy import DiscountedBayesianIndexPolicy 16 | except (ImportError, SystemError): 17 | from DiscountedBayesianIndexPolicy import DiscountedBayesianIndexPolicy 18 | 19 | 20 | class DiscountedThompson(DiscountedBayesianIndexPolicy): 21 | """The DiscountedThompson (Bayesian) index policy. 22 | 23 | - By default, it uses a DiscountedBeta posterior (:class:`Policies.Posterior.DiscountedBeta`), one by arm. 24 | - Reference: [["Taming Non-stationary Bandits: A Bayesian Approach", Vishnu Raj & Sheetal Kalyani, arXiv:1707.09727](https://arxiv.org/abs/1707.09727)]. 25 | """ 26 | 27 | def computeIndex(self, arm): 28 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k, by sampling from the DiscountedBeta posterior. 29 | 30 | .. math:: 31 | A(t) &\sim U(\arg\max_{1 \leq k \leq K} I_k(t)),\\ 32 | I_k(t) &\sim \mathrm{Beta}(1 + \widetilde{S_k}(t), 1 + \widetilde{F_k}(t)). 33 | 34 | - It keeps :math:`\widetilde{S_k}(t)` and :math:`\widetilde{F_k}(t)` the discounted counts of successes and failures (S and F), for each arm k. 35 | 36 | - But instead of using :math:`\widetilde{S_k}(t) = S_k(t)` and :math:`\widetilde{N_k}(t) = N_k(t)`, they are updated at each time step using the discount factor :math:`\gamma`: 37 | 38 | .. math:: 39 | 40 | \widetilde{S_{A(t)}}(t+1) &= \gamma \widetilde{S_{A(t)}}(t) + r(t),\\ 41 | \widetilde{S_{k'}}(t+1) &= \gamma \widetilde{S_{k'}}(t), \forall k' \neq A(t). 42 | 43 | .. math:: 44 | 45 | \widetilde{F_{A(t)}}(t+1) &= \gamma \widetilde{F_{A(t)}}(t) + (1 - r(t)),\\ 46 | \widetilde{F_{k'}}(t+1) &= \gamma \widetilde{F_{k'}}(t), \forall k' \neq A(t). 47 | """ 48 | return self.posterior[arm].sample() 49 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/EmpiricalMeans.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The naive Empirical Means policy for bounded bandits: like UCB but without a bias correction term. Note that it is equal to UCBalpha with alpha=0, only quicker.""" 3 | from __future__ import division, print_function # Python 2 compatibility 4 | 5 | __author__ = "Lilian Besson" 6 | __version__ = "0.1" 7 | 8 | import numpy as np 9 | np.seterr(divide='ignore', invalid='ignore') # XXX dangerous in general, controlled here! 10 | 11 | try: 12 | from .IndexPolicy import IndexPolicy 13 | except ImportError: 14 | from IndexPolicy import IndexPolicy 15 | 16 | 17 | class EmpiricalMeans(IndexPolicy): 18 | """ The naive Empirical Means policy for bounded bandits: like UCB but without a bias correction term. Note that it is equal to UCBalpha with alpha=0, only quicker.""" 19 | 20 | def computeIndex(self, arm): 21 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k: 22 | 23 | .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)}. 24 | """ 25 | if self.pulls[arm] < 1: 26 | return float('+inf') 27 | else: 28 | return self.rewards[arm] / self.pulls[arm] 29 | 30 | def computeAllIndex(self): 31 | """ Compute the current indexes for all arms, in a vectorized manner.""" 32 | indexes = self.rewards / self.pulls 33 | indexes[self.pulls < 1] = float('+inf') 34 | self.index[:] = indexes 35 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/Experimentals/.gitignore: -------------------------------------------------------------------------------- 1 | # automatically generated with cython for kullback_cython.pyx 2 | kullback.c 3 | kullback_cython.c 4 | build/ 5 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/Experimentals/KLempUCB.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The Empirical KL-UCB algorithm non-parametric policy. 3 | Reference: [Maillard, Munos & Stoltz - COLT, 2011], [Cappé, Garivier, Maillard, Munos & Stoltz, 2012]. 4 | """ 5 | from __future__ import division, print_function # Python 2 compatibility 6 | 7 | __author__ = "Olivier Cappé, Aurélien Garivier, Lilian Besson" 8 | __version__ = "0.1" 9 | 10 | # WARNING: this is a HUGE hack to fix a mystery bug on importing this policy 11 | from sys import path 12 | from os.path import dirname 13 | path.insert(0, '/'.join(dirname(__file__).split('/')[:-1])) 14 | import numpy as np 15 | 16 | try: 17 | from .kullback import maxEV # XXX Not detected as in the kullback.py file ? 18 | from .IndexPolicy import IndexPolicy 19 | except ImportError: 20 | from kullback import maxEV # XXX Not detected as in the kullback.py file ? 21 | from IndexPolicy import IndexPolicy 22 | 23 | 24 | class KLempUCB(IndexPolicy): 25 | """ The Empirical KL-UCB algorithm non-parametric policy. 26 | References: [Maillard, Munos & Stoltz - COLT, 2011], [Cappé, Garivier, Maillard, Munos & Stoltz, 2012]. 27 | """ 28 | 29 | def __init__(self, nbArms, maxReward=1., lower=0., amplitude=1.): 30 | super(KLempUCB, self).__init__(nbArms, lower=lower, amplitude=amplitude) 31 | self.c = 1 #: Parameter c 32 | self.maxReward = maxReward #: Known upper bound on the rewards 33 | self.pulls = np.zeros(self.nbArms, dtype=int) #: Keep track of pulls of each arm 34 | #: UNBOUNDED dictionnary for each arm: keep track of how many observation of each rewards were seen. 35 | #: Warning: KLempUCB works better for *discrete* distributions! 36 | self.obs = [dict()] * self.nbArms 37 | 38 | def startGame(self): 39 | """ Initialize the policy for a new game.""" 40 | self.t = 0 41 | self.pulls.fill(0) 42 | for arm in range(self.nbArms): 43 | self.obs[arm] = {self.maxReward: 0} 44 | 45 | def computeIndex(self, arm): 46 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k.""" 47 | if self.pulls[arm] < 1: 48 | return float('+infinity') 49 | else: 50 | return self._KLucb(self.obs[arm], self.c * np.log(self.t) / self.pulls[arm]) 51 | 52 | def getReward(self, arm, reward): 53 | """ Give a reward: increase t, pulls, and update count of observations for that arm.""" 54 | self.t += 1 55 | self.pulls[arm] += 1 56 | self.obs[arm][reward] = 1 + self.obs[arm].get(reward, 0) 57 | 58 | # FIXME this does not work apparently... 59 | @staticmethod 60 | def _KLucb(obs, klMax, debug=False): 61 | """ Optimization method.""" 62 | p = np.array(list(obs.values()), dtype=float) 63 | p /= np.sum(p) 64 | v = np.array(list(obs.keys()), dtype=float) 65 | if debug: 66 | print("Calling maxEV(", p, ", ", v, ", ", klMax, ") ...") 67 | q = maxEV(p, v, klMax) 68 | # if debug: 69 | # q2 = kbp.maxEV(p, v, klMax) 70 | # if max(abs(q - q2)) > 1e-8: 71 | # print("ERROR: for p=", p, " ,v = ", v, " and klMax = ", klMax, " : ") 72 | # print("q = ", q) 73 | # print("q2 = ", q2) 74 | # print("_____________________________") 75 | # print("q = ", q) 76 | return np.dot(q, v) 77 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/Experimentals/Makefile: -------------------------------------------------------------------------------- 1 | # Basic Makefile to compile a Cython extension. 2 | # It is used to compile the cython_extensions extension, by running 'make cython_extensions' 3 | 4 | cython_extensions3: cython_extensions 5 | cython_extensions: 6 | python3 setup.py build_ext --inplace 7 | -cp -vf SMPyBandits/Policies/Experimentals/*.so ./ 8 | -chmod -x ./*.so 9 | -chmod g-w ./*.so 10 | -chmod o-w ./*.so 11 | -ls -larth ./*.so 12 | -rm -vfr ./build ./*.c 13 | # -mv -vf ./SMPyBandits /tmp/ 14 | 15 | cython_extensions2: 16 | python2 setup.py build_ext --inplace 17 | -cp -vf SMPyBandits/Policies/Experimentals/*.so ./ 18 | -chmod -x ./*.so 19 | -chmod g-w ./*.so 20 | -chmod o-w ./*.so 21 | -ls -larth ./*.so 22 | -rm -vfr ./build ./*.c 23 | # -mv -vf ./SMPyBandits /tmp/ 24 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/Experimentals/README.md: -------------------------------------------------------------------------------- 1 | # [Single-Player policies](https://smpybandits.github.io/docs/Policies.Experimentals.html) 2 | > See here the documentation: [docs/Policies.Experimentals](https://smpybandits.github.io/docs/Policies.Experimentals.html) 3 | 4 | ## List of experimental policies 5 | ``Policies.Experimentals.Experimentals`` module : contains experimental or unfinished (single-player) bandits algorithms: 6 | 7 | - Index based UCB algorithms: [`UCBlog10`](UCBlog10.py), [`UCBwrong`](UCBwrong.py), [`UCBlog10alpha`](UCBlog10alpha.py), [`UCBcython`](UCBcython.py), [`UCBjulia`](UCBjulia.py) (with [`UCBjulia.jl`](UCBjulia.jl)), 8 | 9 | - Based on Kullback-Leibler divergence: [`klUCBlog10`](klUCBlog10.py), [`klUCBloglog10`](klUCBloglog10.py), 10 | 11 | - Empirical KL-UCB algorithm: [`KLempUCB`](KLempUCB.py) (does not work with the C optimized version of [`kullback`](kullback.py), 12 | 13 | - An *experimental* policy, using Unsupervised Learning: [`UnsupervisedLearning`](UnsupervisedLearning.py), 14 | 15 | - An *experimental* policy, using Black-box optimization: [`BlackBoxOpt`](BlackBoxOpt.py), 16 | 17 | - Bayesian algorithms: [`ThompsonRobust`](ThompsonRobust.py), 18 | 19 | - **New!** The UCBoost (Upper Confidence bounds with Boosting) policies, first with no boosting, in module [`UCBoost_faster`](UCBoost_faster.py): `UCBoost_faster.UCB_sq`, `UCBoost_faster.UCB_bq`, `UCBoost_faster.UCB_h`, `UCBoost_faster.UCB_lb`, `UCBoost_faster.UCB_t`, and then the ones with non-adaptive boosting: `UCBoost_faster.UCBoost_bq_h_lb`, `UCBoost_faster.UCBoost_bq_h_lb_t`, `UCBoost_faster.UCBoost_bq_h_lb_t_sq`, `UCBoost_faster.UCBoost`, and finally the epsilon-approximation boosting with `UCBoost_faster.UCBoostEpsilon`. These versions use Cython for some functions. 20 | 21 | - **New!** The UCBoost (Upper Confidence bounds with Boosting) policies, first with no boosting, in module [`UCBoost_cython`](UCBoost_cython.py): `UCBoost_cython.UCB_sq`, `UCBoost_cython.UCB_bq`, `UCBoost_cython.UCB_h`, `UCBoost_cython.UCB_lb`, `UCBoost_cython.UCB_t`, and then the ones with non-adaptive boosting: `UCBoost_cython.UCBoost_bq_h_lb`, `UCBoost_cython.UCBoost_bq_h_lb_t`, `UCBoost_cython.UCBoost_bq_h_lb_t_sq`, `UCBoost_cython.UCBoost`, and finally the epsilon-approximation boosting with `UCBoost_cython.UCBoostEpsilon`. These versions use Cython for the whole code. 22 | 23 | 24 | ## API 25 | All policies have the same interface, as described in [`BasePolicy`](../BasePolicy.py), 26 | in order to use them in any experiment with the following approach: 27 | 28 | ```python 29 | my_policy = Policy(nbArms) 30 | my_policy.startGame() # start the game 31 | for t in range(T): 32 | chosen_arm_t = k_t = my_policy.choice() # chose one arm 33 | reward_t = sampled from an arm k_t # sample a reward 34 | my_policy.getReward(k_t, reward_t) # give it the the policy 35 | ``` 36 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/Experimentals/ThompsonRobust.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """The Thompson (Bayesian) index policy, using an average of 20 index. By default, it uses a Beta posterior. 3 | Reference: [Thompson - Biometrika, 1933]. 4 | """ 5 | from __future__ import division, print_function # Python 2 compatibility 6 | 7 | __author__ = "Lilian Besson" 8 | __version__ = "0.6" 9 | 10 | import numpy as np 11 | 12 | # WARNING: this is a HUGE hack to fix a mystery bug on importing this policy 13 | from sys import path 14 | from os.path import dirname 15 | path.insert(0, '/'.join(dirname(__file__).split('/')[:-1])) 16 | try: 17 | from .Thompson import Thompson 18 | from .Posterior import Beta 19 | except ImportError: 20 | from Thompson import Thompson 21 | from Posterior import Beta 22 | 23 | 24 | #: Default value of how many indexes are computed by sampling the posterior 25 | #: for the ThompsonRobust variant. 26 | AVERAGEON = 10 27 | 28 | 29 | class ThompsonRobust(Thompson): 30 | """The Thompson (Bayesian) index policy, using an average of 20 index. By default, it uses a Beta posterior. 31 | Reference: [Thompson - Biometrika, 1933]. 32 | """ 33 | 34 | def __init__(self, nbArms, posterior=Beta, averageOn=AVERAGEON, lower=0., amplitude=1.): 35 | super(ThompsonRobust, self).__init__(nbArms, posterior=posterior, lower=lower, amplitude=amplitude) 36 | assert averageOn >= 1, "Error: invalid value for 'averageOn' parameter for ThompsonRobust, should be >= 1." # DEBUG 37 | self.averageOn = averageOn #: How many indexes are computed before averaging 38 | 39 | def __str__(self): 40 | return "%s(averageOn = %i)" % (self.__class__.__name__, self.averageOn) 41 | 42 | def computeIndex(self, arm): 43 | r""" Compute the current index for this arm, by sampling averageOn times the posterior and returning the average index. 44 | 45 | At time t and after :math:`N_k(t)` pulls of arm k, giving :math:`S_k(t)` rewards of 1, by sampling from the Beta posterior and averaging: 46 | 47 | .. math:: 48 | 49 | I_k(t) &= \frac{1}{\mathrm{averageOn}} \sum_{i=1}^{\mathrm{averageOn}} I_k^{(i)}(t), \\ 50 | I_k^{(i)}(t) &\sim \mathrm{Beta}(1 + S_k(t), 1 + N_k(t) - S_k(t)). 51 | """ 52 | return np.mean([self.posterior[arm].sample() for _ in range(self.averageOn)]) 53 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/Experimentals/UCBcython.pyx: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The UCB1 (UCB-alpha) index policy, using a Cython extension. 3 | 4 | - Reference: [Auer et al. 02]. 5 | 6 | .. warning:: 7 | 8 | This extension should be used with the ``setup.py`` script, by running:: 9 | 10 | $ python setup.py build_ext --inplace 11 | 12 | You can also use [pyximport](http://docs.cython.org/en/latest/src/tutorial/cython_tutorial.html#pyximport-cython-compilation-for-developers) to import the ``kullback_cython`` module transparently: 13 | 14 | >>> import pyximport; pyximport.install() # instantaneous # doctest: +ELLIPSIS 15 | (None, ) 16 | >>> from UCBcython import * # takes about two seconds 17 | """ 18 | from __future__ import division, print_function # Python 2 compatibility 19 | 20 | __author__ = "Lilian Besson" 21 | __version__ = "0.9" 22 | 23 | from libc.math cimport log, sqrt, exp, ceil, floor 24 | 25 | import numpy as np 26 | # cimport numpy as np # WARNING might be deprecated 27 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 28 | from sys import path; path.insert(0, '..') 29 | 30 | try: 31 | # from IndexPolicy import IndexPolicy 32 | import IndexPolicy as INDEXPOLICY 33 | IndexPolicy = INDEXPOLICY.IndexPolicy 34 | except ImportError: 35 | from .IndexPolicy import IndexPolicy 36 | 37 | try: 38 | import UCB as UCBMODULE 39 | UCB = UCBMODULE.UCB 40 | except ImportError: 41 | from .UCB import UCB 42 | 43 | #: Default parameter for alpha 44 | cdef float ALPHA 45 | ALPHA = 1 46 | ALPHA = 4 47 | 48 | 49 | cdef float UCBindex(float reward, float pull, float t, int arm, float alpha=ALPHA): 50 | if pull < 1: 51 | return float('+inf') 52 | else: 53 | return (reward / pull) + sqrt((alpha * log(t)) / (2 * pull)) 54 | 55 | 56 | class UCBcython(UCB): 57 | """ The UCB1 (UCB-alpha) index policy, using a Cython extension. 58 | 59 | - Reference: [Auer et al. 02]. 60 | """ 61 | 62 | def __init__(self, nbArms, alpha=ALPHA, lower=0., amplitude=1.): 63 | super(UCBcython, self).__init__(nbArms, lower=lower, amplitude=amplitude) 64 | assert alpha >= 0, "Error: the alpha parameter for UCBcython class has to be >= 0." # DEBUG 65 | self.alpha = alpha #: Parameter alpha 66 | 67 | def __str__(self): 68 | return r"UCBcython($\alpha={:.3g}$)".format(self.alpha) 69 | 70 | def computeIndex(self, arm): 71 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k: 72 | 73 | .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\frac{\alpha \log(t)}{2 N_k(t)}}. 74 | """ 75 | return UCBindex(self.rewards[arm], self.pulls[arm], self.t, self.alpha) 76 | # if self.pulls[arm] < 1: 77 | # return float('+inf') 78 | # else: 79 | # return (self.rewards[arm] / self.pulls[arm]) + sqrt((self.alpha * log(self.t)) / (2 * self.pulls[arm])) 80 | 81 | def computeAllIndex(self): 82 | """ Compute the current indexes for all arms, in a vectorized manner.""" 83 | for arm in range(self.nbArms): 84 | self.index[arm] = self.computeIndex(arm) 85 | # indexes = (self.rewards / self.pulls) + np.sqrt((self.alpha * np.log(self.t)) / (2 * self.pulls)) 86 | # indexes[self.pulls < 1] = float('+inf') 87 | # self.index[:] = indexes 88 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/Experimentals/UCBjulia.jl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env julia 2 | """ 3 | A small Julia module that defines a simple function, to be used in UCBjulia.py (with pyjulia). 4 | """ 5 | 6 | # Small Julia module to wrap the function that computes a UCB index 7 | module UCBjulia 8 | function index(rewards, pulls, t, arm, alpha=4) 9 | if pulls[arm] < 1 10 | return Inf 11 | else 12 | return (rewards[arm] / pulls[arm]) + sqrt((alpha * log(t)) / (2 * pulls[arm])) 13 | end 14 | end 15 | end 16 | 17 | # Small Julia function that computes a UCB index 18 | function index(rewards, pulls, t, arm, alpha=4) 19 | if pulls[arm] < 1 20 | return Inf 21 | else 22 | return (rewards[arm] / pulls[arm]) + sqrt((alpha * log(t)) / (2 * pulls[arm])) 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/Experimentals/UCBjulia.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The UCB policy for bounded bandits, with UCB indexes computed with Julia. 3 | Reference: [Lai & Robbins, 1985]. 4 | 5 | .. warning:: 6 | 7 | Using a Julia function *from* Python will not speed up anything, as there is a lot of overhead in the "bridge" protocol used by pyjulia. 8 | The idea of using naively a tiny Julia function to speed up computations is basically useless. 9 | 10 | A naive benchmark showed that in this approach, :class:`UCBjulia` (used withing Python) is about 125 times slower (!) than :class:`UCB`. 11 | 12 | .. warning:: This is only experimental, and purely useless. See https://github.com/SMPyBandits/SMPyBandits/issues/98 13 | """ 14 | from __future__ import division, print_function # Python 2 compatibility 15 | 16 | __author__ = "Lilian Besson" 17 | __version__ = "0.9" 18 | 19 | # WARNING: this is a HUGE hack to fix a mystery bug on importing this policy 20 | from sys import path 21 | from os.path import dirname 22 | path.insert(0, '/'.join(dirname(__file__).split('/')[:-1])) 23 | try: 24 | from .IndexPolicy import IndexPolicy 25 | except ImportError: 26 | from IndexPolicy import IndexPolicy 27 | 28 | 29 | class UCBjulia(IndexPolicy): 30 | """ The UCB policy for bounded bandits, with UCB indexes computed with Julia. 31 | Reference: [Lai & Robbins, 1985]. 32 | 33 | .. warning:: This is only experimental, and purely useless. See https://github.com/SMPyBandits/SMPyBandits/issues/98 34 | """ 35 | 36 | def __init__(self, nbArms, lower=0., amplitude=1.): 37 | """ Will fail directly if the bridge with julia is unavailable or buggy.""" 38 | super(UCBjulia, self).__init__(nbArms, lower=lower, amplitude=amplitude) 39 | self.t = 0 40 | # Importing the julia module and creating the bridge 41 | try: 42 | import julia 43 | except ImportError as e: 44 | print("Error: unable to load the 'julia' Python module. Install with 'pip install julia', or see https://github.com/JuliaPy/pyjulia/") # DEBUG 45 | raise e 46 | _j = julia.Julia() 47 | try: 48 | self._index_function = _j.evalfile("Policies/UCBjulia.jl") 49 | except RuntimeError: 50 | try: 51 | self._index_function = _j.evalfile("UCBjulia.jl") 52 | except RuntimeError: 53 | raise ValueError("Error: Unable to load 'UCBjulia.jl' julia file.") # WARNING 54 | try: 55 | self._index_function([1], [1], 1, 1) 56 | except (RuntimeError, ValueError): 57 | raise ValueError("Error: the index function loaded from 'UCBjulia.jl' is bugged or unavailable.") # WARNING 58 | 59 | def computeIndex(self, arm): 60 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k: 61 | 62 | .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\frac{2 \log(t)}{N_k(t)}}. 63 | """ 64 | # WARNING: the 'arm + 1' part comes from the difference between 0-based indexes 65 | # for Python and the 1-based indexes in Julia. The rest works pretty well! 66 | return self._index_function(self.rewards, self.pulls, self.t, arm + 1) 67 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/Experimentals/UCBlog10.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | r""" The UCB policy for bounded bandits, using :math:`\log10(t)` and not :math:`\log(t)` for UCB index. 3 | Reference: [Lai & Robbins, 1985]. 4 | """ 5 | from __future__ import division, print_function # Python 2 compatibility 6 | 7 | __author__ = "Lilian Besson" 8 | __version__ = "0.1" 9 | 10 | from math import sqrt, log10 11 | import numpy as np 12 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 13 | 14 | # WARNING: this is a HUGE hack to fix a mystery bug on importing this policy 15 | from sys import path 16 | from os.path import dirname 17 | path.insert(0, '/'.join(dirname(__file__).split('/')[:-1])) 18 | 19 | try: 20 | from .IndexPolicy import IndexPolicy 21 | except ImportError: 22 | from IndexPolicy import IndexPolicy 23 | 24 | 25 | class UCBlog10(IndexPolicy): 26 | r""" The UCB policy for bounded bandits, using :math:`\log10(t)` and not :math:`\log(t)` for UCB index. 27 | Reference: [Lai & Robbins, 1985]. 28 | """ 29 | 30 | def computeIndex(self, arm): 31 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k: 32 | 33 | .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\frac{2 \log_{10}(t)}{N_k(t)}}. 34 | """ 35 | if self.pulls[arm] < 1: 36 | return float('+inf') 37 | else: 38 | return (self.rewards[arm] / self.pulls[arm]) + sqrt((2 * log10(self.t)) / self.pulls[arm]) 39 | 40 | def computeAllIndex(self): 41 | """ Compute the current indexes for all arms, in a vectorized manner.""" 42 | indexes = (self.rewards / self.pulls) + np.sqrt((2 * np.log10(self.t)) / self.pulls) 43 | indexes[self.pulls < 1] = float('+inf') 44 | self.index[:] = indexes 45 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/Experimentals/UCBlog10alpha.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | r""" The UCB1 (UCB-alpha) index policy, modified to take a random permutation order for the initial exploration of each arm (reduce collisions in the multi-players setting). 3 | Note: :math:`\log10(t)` and not :math:`\log(t)` for UCB index. 4 | Reference: [Auer et al. 02]. 5 | """ 6 | from __future__ import division, print_function # Python 2 compatibility 7 | 8 | __author__ = "Lilian Besson" 9 | __version__ = "0.2" 10 | 11 | from math import sqrt, log10 12 | import numpy as np 13 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 14 | 15 | # WARNING: this is a HUGE hack to fix a mystery bug on importing this policy 16 | from sys import path 17 | from os.path import dirname 18 | path.insert(0, '/'.join(dirname(__file__).split('/')[:-1])) 19 | try: 20 | from .UCBlog10 import UCBlog10 21 | except ImportError: 22 | from UCBlog10 import UCBlog10 23 | 24 | #: Default parameter for alpha 25 | ALPHA = 4 26 | ALPHA = 1 27 | 28 | 29 | class UCBlog10alpha(UCBlog10): 30 | r""" The UCB1 (UCB-alpha) index policy, modified to take a random permutation order for the initial exploration of each arm (reduce collisions in the multi-players setting). 31 | Note: :math:`\log10(t)` and not :math:`\log(t)` for UCB index. 32 | Reference: [Auer et al. 02]. 33 | """ 34 | 35 | def __init__(self, nbArms, alpha=ALPHA, lower=0., amplitude=1.): 36 | super(UCBlog10alpha, self).__init__(nbArms, lower=lower, amplitude=amplitude) 37 | assert alpha >= 0, "Error: the alpha parameter for UCBalpha class has to be >= 0." # DEBUG 38 | self.alpha = alpha #: Parameter alpha 39 | 40 | def __str__(self): 41 | return r"UCB($\alpha={:.3g}$, {})".format(self.alpha, r"$\log_{10}$") 42 | 43 | def computeIndex(self, arm): 44 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k: 45 | 46 | .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\frac{\alpha \log_{10}(t)}{2 N_k(t)}}. 47 | """ 48 | if self.pulls[arm] < 1: 49 | return float('+inf') 50 | else: 51 | return (self.rewards[arm] / self.pulls[arm]) + sqrt((self.alpha * log10(self.t)) / (2 * self.pulls[arm])) 52 | 53 | def computeAllIndex(self): 54 | """ Compute the current indexes for all arms, in a vectorized manner.""" 55 | indexes = (self.rewards / self.pulls) + np.sqrt((self.alpha * np.log10(self.t)) / (2 * self.pulls)) 56 | indexes[self.pulls < 1] = float('+inf') 57 | self.index[:] = indexes 58 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/Experimentals/UCBwrong.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | r""" The UCBwrong policy for bounded bandits, like UCB but with a typo on the estimator of means: 3 | :math:`\frac{X_k(t)}{t}` is used instead of :math:`\frac{X_k(t)}{N_k(t)}`. 4 | 5 | One paper of W.Jouini, C.Moy and J.Palicot from 2009 contained this typo, I reimplemented it just to check that: 6 | 7 | - its performance is worse than simple UCB, 8 | - but not that bad... 9 | """ 10 | from __future__ import division, print_function # Python 2 compatibility 11 | 12 | __author__ = "Lilian Besson" 13 | __version__ = "0.1" 14 | 15 | from math import sqrt, log 16 | import numpy as np 17 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 18 | 19 | # WARNING: this is a HUGE hack to fix a mystery bug on importing this policy 20 | from sys import path 21 | from os.path import dirname 22 | path.insert(0, '/'.join(dirname(__file__).split('/')[:-1])) 23 | try: 24 | from .IndexPolicy import IndexPolicy 25 | except ImportError: 26 | from IndexPolicy import IndexPolicy 27 | 28 | 29 | class UCBwrong(IndexPolicy): 30 | """ The UCBwrong policy for bounded bandits, like UCB but with a typo on the estimator of means. 31 | 32 | One paper of W.Jouini, C.Moy and J.Palicot from 2009 contained this typo, I reimplemented it just to check that: 33 | 34 | - its performance is worse than simple UCB 35 | - but not that bad... 36 | """ 37 | 38 | def computeIndex(self, arm): 39 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k: 40 | 41 | .. math:: I_k(t) = \frac{X_k(t)}{t} + \sqrt{\frac{2 \log(t)}{N_k(t)}}. 42 | """ 43 | if self.pulls[arm] < 1: 44 | return float('+inf') 45 | else: 46 | # XXX Volontary typo, wrong mean estimate 47 | return (self.rewards[arm] / self.t) + sqrt((2 * log(self.t)) / self.pulls[arm]) 48 | 49 | def computeAllIndex(self): 50 | """ Compute the current indexes for all arms, in a vectorized manner.""" 51 | indexes = (self.rewards / self.t) + np.sqrt((2 * np.log(self.t)) / self.pulls) 52 | indexes[self.pulls < 1] = float('+inf') 53 | self.index[:] = indexes 54 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/Experimentals/klUCBlog10.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | r""" The generic kl-UCB policy for one-parameter exponential distributions. 3 | By default, it assumes Bernoulli arms. 4 | Note: using :math:`\log10(t)` and not :math:`\log(t)` for the KL-UCB index. 5 | Reference: [Garivier & Cappé - COLT, 2011]. 6 | """ 7 | from __future__ import division, print_function # Python 2 compatibility 8 | 9 | __author__ = "Lilian Besson" 10 | __version__ = "0.5" 11 | 12 | from math import log10 13 | import numpy as np 14 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 15 | 16 | # WARNING: this is a HUGE hack to fix a mystery bug on importing this policy 17 | from sys import path 18 | from os.path import dirname 19 | path.insert(0, '/'.join(dirname(__file__).split('/')[:-1])) 20 | try: 21 | from .klUCB import klUCB 22 | except ImportError: 23 | from klUCB import klUCB 24 | 25 | 26 | class klUCBlog10(klUCB): 27 | r""" The generic kl-UCB policy for one-parameter exponential distributions. 28 | By default, it assumes Bernoulli arms. 29 | Note: using :math:`\log10(t)` and not :math:`\log(t)` for the KL-UCB index. 30 | Reference: [Garivier & Cappé - COLT, 2011]. 31 | """ 32 | 33 | def __str__(self): 34 | return r"kl-UCB({}{}{})".format("" if self.c == 1 else r"$c={:.3g}$, ".format(self.c), r"$\log_{10}$, ", self.klucb.__name__[5:]) 35 | 36 | def computeIndex(self, arm): 37 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k: 38 | 39 | .. math:: 40 | 41 | \hat{\mu}_k(t) &= \frac{X_k(t)}{N_k(t)}, \\ 42 | U_k(t) &= \sup\limits_{q \in [a, b]} \left\{ q : \mathrm{kl}(\hat{\mu}_k(t), q) \leq \frac{c \log_{10}(t)}{N_k(t)} \right\},\\ 43 | I_k(t) &= U_k(t). 44 | 45 | If rewards are in :math:`[a, b]` (default to :math:`[0, 1]`) and :math:`\mathrm{kl}(x, y)` is the Kullback-Leibler divergence between two distributions of means x and y (see :mod:`Arms.kullback`), 46 | and c is the parameter (default to 1). 47 | """ 48 | if self.pulls[arm] < 1: 49 | return float('+inf') 50 | else: 51 | # XXX We could adapt tolerance to the value of self.t 52 | return self.klucb(self.rewards[arm] / self.pulls[arm], self.c * log10(self.t) / self.pulls[arm], self.tolerance) 53 | 54 | def computeAllIndex(self): 55 | """ Compute the current indexes for all arms, in a vectorized manner.""" 56 | indexes = self.klucb_vect(self.rewards / self.pulls, self.c * np.log10(self.t) / self.pulls, self.tolerance) 57 | indexes[self.pulls < 1] = float('+inf') 58 | self.index[:] = indexes 59 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/Experimentals/klUCBloglog10.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | r""" The generic kl-UCB policy for one-parameter exponential distributions. 3 | By default, it assumes Bernoulli arms. 4 | Note: using :math:`\log10(t)` and not :math:`\log(t)` for the KL-UCB index. 5 | Reference: [Garivier & Cappé - COLT, 2011]. 6 | """ 7 | from __future__ import division, print_function # Python 2 compatibility 8 | 9 | __author__ = "Lilian Besson" 10 | __version__ = "0.5" 11 | 12 | from math import log10 13 | import numpy as np 14 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 15 | 16 | # WARNING: this is a HUGE hack to fix a mystery bug on importing this policy 17 | from sys import path 18 | from os.path import dirname 19 | path.insert(0, '/'.join(dirname(__file__).split('/')[:-1])) 20 | try: 21 | from .klUCB import klUCB 22 | except ImportError: 23 | from klUCB import klUCB 24 | 25 | 26 | class klUCBloglog10(klUCB): 27 | r""" The generic kl-UCB policy for one-parameter exponential distributions. 28 | By default, it assumes Bernoulli arms. 29 | Note: using :math:`\log10(t)` and not :math:`\log(t)` for the KL-UCB index. 30 | Reference: [Garivier & Cappé - COLT, 2011]. 31 | """ 32 | 33 | def __str__(self): 34 | return r"kl-UCB({}{}{})".format("" if self.c == 1 else r"$c={:.3g}$, ".format(self.c), r"$\log_{10}\log_{10}$, ", self.klucb.__name__[5:]) 35 | 36 | def computeIndex(self, arm): 37 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k: 38 | 39 | .. math:: 40 | 41 | \hat{\mu}_k(t) &= \frac{X_k(t)}{N_k(t)}, \\ 42 | U_k(t) &= \sup\limits_{q \in [a, b]} \left\{ q : \mathrm{kl}(\hat{\mu}_k(t), q) \leq \frac{\log_{10}(t) + c \log(\max(1, \log_{10}(t)))}{N_k(t)} \right\},\\ 43 | I_k(t) &= U_k(t). 44 | 45 | If rewards are in :math:`[a, b]` (default to :math:`[0, 1]`) and :math:`\mathrm{kl}(x, y)` is the Kullback-Leibler divergence between two distributions of means x and y (see :mod:`Arms.kullback`), 46 | and c is the parameter (default to 1). 47 | """ 48 | if self.pulls[arm] < 1: 49 | return float('+inf') 50 | else: 51 | # XXX We could adapt tolerance to the value of self.t 52 | return self.klucb(self.rewards[arm] / self.pulls[arm], (log10(self.t) + self.c * log10(max(1, log10(self.t)))) / self.pulls[arm], self.tolerance) 53 | 54 | def computeAllIndex(self): 55 | """ Compute the current indexes for all arms, in a vectorized manner.""" 56 | indexes = self.klucb_vect(self.rewards / self.pulls, (np.log10(self.t) + self.c * np.log10(np.maximum(1., np.log10(self.t)))) / self.pulls, self.tolerance) 57 | indexes[self.pulls < 1] = float('+inf') 58 | self.index[:] = indexes 59 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/Experimentals/setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Basic setup.py to compile a Cython extension. 4 | It is used to compile the ``UCBoost_faster_cython``, ``UCBoost_cython``, ``UCBcython`` extension, by running:: 5 | 6 | $ python setup.py build_ext --inplace 7 | 8 | You can also use [pyximport](http://docs.cython.org/en/latest/src/tutorial/cython_tutorial.html#pyximport-cython-compilation-for-developers) to import the ``kullback_cython`` module transparently: 9 | 10 | >>> import pyximport; pyximport.install() 11 | >>> import kullback_cython as kullback 12 | >>> # then use kullback.klucbBern or others, as if they came from the pure Python version! 13 | """ 14 | from distutils.core import setup 15 | from distutils.extension import Extension 16 | from Cython.Build import cythonize 17 | 18 | extensions = [ 19 | # Extension("UCBoost_faster_cython", ["UCBoost_faster_cython.pyx"]), 20 | # XXX also build the extension with full name? 21 | Extension("SMPyBandits.Policies.Experimentals.UCBoost_faster_cython", ["UCBoost_faster_cython.pyx"]), 22 | # Extension("UCBoost_cython", ["UCBoost_cython.pyx"]), 23 | # XXX also build the extension with full name? 24 | Extension("SMPyBandits.Policies.Experimentals.UCBoost_cython", ["UCBoost_cython.pyx"]), 25 | # Extension("UCBcython", ["UCBcython.pyx"]), 26 | # XXX also build the extension with full name? 27 | Extension("SMPyBandits.Policies.Experimentals.UCBcython", ["UCBcython.pyx"]), 28 | ] 29 | 30 | setup( 31 | ext_modules = cythonize(extensions, compiler_directives={ 32 | 'embedsignature': True, 33 | 'language_level': 3, 34 | 'warn.undeclared': True, 35 | 'warn.unreachable': True, 36 | 'warn.maybe_uninitialized': True, 37 | 'warn.unused': True, 38 | 'warn.unused_arg': True, 39 | 'warn.unused_result': True, 40 | 'warn.multiple_declarators': True, 41 | }) 42 | ) 43 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/GreedyOracle.py: -------------------------------------------------------------------------------- 1 | """ 2 | author: Julien Seznec 3 | 4 | Oracle and near-minimax policy for rotting bandits without noise. 5 | 6 | Reference: [Heidari et al., 2016, https://www.ijcai.org/Proceedings/16/Papers/224.pdf] 7 | Tight Policy Regret Bounds for Improving and Decaying Bandits. 8 | Hoda Heidari, Michael Kearns, Aaron Roth. 9 | International Joint Conference on Artificial Intelligence (IJCAI) 2016, 1562. 10 | """ 11 | from .IndexPolicy import IndexPolicy 12 | import numpy as np 13 | 14 | class GreedyPolicy(IndexPolicy): 15 | """ 16 | Greedy Policy for rotting bandits (A2 in the reference below). 17 | Selects arm with best last value. 18 | Reference: [Heidari et al., 2016, https://www.ijcai.org/Proceedings/16/Papers/224.pdf] 19 | """ 20 | def __init__(self, nbArms): 21 | super(GreedyPolicy, self).__init__(nbArms) 22 | self.last_pull = [np.inf for _ in range(nbArms)] 23 | 24 | def getReward(self, arm, reward): 25 | super(GreedyPolicy, self).getReward(arm, reward) 26 | self.last_pull[arm] = reward 27 | 28 | def computeAllIndex(self): 29 | return self.last_pull 30 | 31 | def computeIndex(self,arm): 32 | """ Compute the mean of the h last value """ 33 | return self.last_pull[arm] 34 | 35 | def startGame(self): 36 | super(GreedyPolicy, self).startGame() 37 | self.last_pull = [np.inf for _ in self.last_pull] 38 | 39 | 40 | class GreedyOracle(IndexPolicy): 41 | """ 42 | Greedy Oracle for rotting bandits (A0 in the reference below). 43 | Look 1 step forward and select next best value. 44 | Optimal policy for rotting bandits problem. 45 | Reference: [Heidari et al., 2016, https://www.ijcai.org/Proceedings/16/Papers/224.pdf] 46 | """ 47 | def __init__(self,nbArms, arms): 48 | super(GreedyOracle, self).__init__(nbArms) 49 | self.arms = arms 50 | 51 | def computeIndex(self, arm): 52 | return self.arms[arm].mean 53 | 54 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/H_UCB.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function # Python 2 compatibility 3 | 4 | __author__ = "SlyJabiru" 5 | __version__ = "0.1" 6 | 7 | 8 | from math import sqrt, log 9 | import numpy as np 10 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 11 | 12 | 13 | try: 14 | from .StrategicIndexPolicy import StrategicIndexPolicy 15 | except ImportError: 16 | from StrategicIndexPolicy import StrategicIndexPolicy 17 | 18 | 19 | class H_UCB(StrategicIndexPolicy): 20 | def computeAgentIndex(self, agent): 21 | if self.agentPulls[agent] < 1: 22 | return float('+inf') 23 | else: 24 | return (self.agentRewards[agent] / self.agentPulls[agent]) + sqrt((2 * log(self.t)) / self.agentPulls[agent]) 25 | 26 | def computeArmIndex(self, arm): 27 | if self.armPulls[arm] < 1: 28 | return float('+inf') 29 | else: 30 | armPossession = np.cumsum(self.nbArmsPerAgents) - 1 31 | temp = (armPossession >= arm) 32 | agent = np.where(temp)[0][0] 33 | return (self.armRewards[arm] / self.armPulls[arm]) + sqrt((2 * log(self.agentPulls[agent])) / self.armPulls[arm]) 34 | 35 | def computeAllIndex(self): 36 | """ Compute the current indices for all agent and all arms, in a vectorized manner.""" 37 | agentIndices = (self.agentRewards / self.agentPulls) + np.sqrt((2 * np.log(self.t)) / self.agentPulls) 38 | 39 | agentPullsRepeated = np.repeat(self.agentPulls, self.nbArmsPerAgents) 40 | armIndices = (self.armRewards / self.armPulls) + np.sqrt((2 * np.log(agentPullsRepeated)) / self.armPulls) 41 | 42 | agentIndices[self.agentPulls < 1] = float('+inf') 43 | armIndices[self.armPulls < 1] = float('+inf') 44 | 45 | self.agentIndex[:] = agentIndices 46 | self.armIndex[:] = armIndices 47 | 48 | 49 | # --- Debugging 50 | 51 | # if __name__ == "__main__": 52 | # # Code for debugging purposes. 53 | # from doctest import testmod 54 | # print("\nTesting automatically all the docstring written in each functions of this module :") 55 | # testmod(verbose=True) 56 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/MOSS.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The MOSS policy for bounded bandits. 3 | Reference: [Audibert & Bubeck, 2010](http://www.jmlr.org/papers/volume11/audibert10a/audibert10a.pdf). 4 | """ 5 | from __future__ import division, print_function # Python 2 compatibility 6 | 7 | __author__ = "Lilian Besson" 8 | __version__ = "0.1" 9 | 10 | import numpy as np 11 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 12 | 13 | try: 14 | from .IndexPolicy import IndexPolicy 15 | except ImportError: 16 | from IndexPolicy import IndexPolicy 17 | 18 | 19 | class MOSS(IndexPolicy): 20 | """ The MOSS policy for bounded bandits. 21 | Reference: [Audibert & Bubeck, 2010](http://www.jmlr.org/papers/volume11/audibert10a/audibert10a.pdf). 22 | """ 23 | 24 | def computeIndex(self, arm): 25 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k, if there is K arms: 26 | 27 | .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\max\left(0, \frac{\log\left(\frac{t}{K N_k(t)}\right)}{N_k(t)}\right)}. 28 | """ 29 | if self.pulls[arm] < 1: 30 | return float('+inf') 31 | else: 32 | return (self.rewards[arm] / self.pulls[arm]) + np.sqrt(max(0, np.log(self.t / (self.nbArms * self.pulls[arm]))) / self.pulls[arm]) 33 | 34 | def computeAllIndex(self): 35 | """ Compute the current indexes for all arms, in a vectorized manner.""" 36 | indexes = (self.rewards / self.pulls) + np.sqrt(np.maximum(0., np.log(self.t / (self.nbArms * self.pulls))) / self.pulls) 37 | indexes[self.pulls < 1] = float('+inf') 38 | self.index[:] = indexes 39 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/MOSSAnytime.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The MOSS-Anytime policy for bounded bandits, without knowing the horizon (and no doubling trick). 3 | Reference: [Degenne & Perchet, 2016](http://proceedings.mlr.press/v48/degenne16.pdf). 4 | """ 5 | from __future__ import division, print_function # Python 2 compatibility 6 | 7 | __author__ = "Lilian Besson" 8 | __version__ = "0.9" 9 | 10 | import numpy as np 11 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 12 | 13 | try: 14 | from .MOSS import MOSS 15 | except ImportError: 16 | from MOSS import MOSS 17 | 18 | 19 | #: Default value for the parameter :math:`\alpha` for the MOSS-Anytime algorithm. 20 | ALPHA = 1.0 21 | 22 | 23 | class MOSSAnytime(MOSS): 24 | """ The MOSS-Anytime policy for bounded bandits, without knowing the horizon (and no doubling trick). 25 | Reference: [Degenne & Perchet, 2016](http://proceedings.mlr.press/v48/degenne16.pdf). 26 | """ 27 | 28 | def __init__(self, nbArms, alpha=ALPHA, lower=0., amplitude=1.): 29 | super(MOSSAnytime, self).__init__(nbArms, lower=lower, amplitude=amplitude) 30 | self.alpha = alpha #: Parameter :math:`\alpha \geq 0` for the computations of the index. Optimal value seems to be :math:`1.35`. 31 | 32 | def __str__(self): 33 | return r"MOSS-Anytime($\alpha={}$)".format(self.alpha) 34 | 35 | def computeIndex(self, arm): 36 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k, if there is K arms: 37 | 38 | .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\left(\frac{1+\alpha}{2}\right) \max\left(0, \frac{\log\left(\frac{t}{K N_k(t)}\right)}{N_k(t)}\right)}. 39 | """ 40 | if self.pulls[arm] < 1: 41 | return float('+inf') 42 | else: 43 | return (self.rewards[arm] / self.pulls[arm]) + np.sqrt(((1. + self.alpha) / 2.) * max(0, np.log(self.t / (self.nbArms * self.pulls[arm]))) / self.pulls[arm]) 44 | 45 | def computeAllIndex(self): 46 | """ Compute the current indexes for all arms, in a vectorized manner.""" 47 | indexes = (self.rewards / self.pulls) + np.sqrt(((1. + self.alpha) / 2.) * np.maximum(0., np.log(self.t / (self.nbArms * self.pulls))) / self.pulls) 48 | indexes[self.pulls < 1] = float('+inf') 49 | self.index[:] = indexes 50 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/MOSSExperimental.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The MOSS-Experimental policy for bounded bandits, without knowing the horizon (and no doubling trick). 3 | Reference: [Degenne & Perchet, 2016](http://proceedings.mlr.press/v48/degenne16.pdf). 4 | 5 | .. warning:: Nothing was proved for this heuristic! 6 | """ 7 | from __future__ import division, print_function # Python 2 compatibility 8 | 9 | __author__ = "Lilian Besson" 10 | __version__ = "0.9" 11 | 12 | from numpy import sqrt, log 13 | import numpy as np 14 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 15 | 16 | try: 17 | from .MOSS import MOSS 18 | except ImportError: 19 | from MOSS import MOSS 20 | 21 | 22 | class MOSSExperimental(MOSS): 23 | """ The MOSS-Experimental policy for bounded bandits, without knowing the horizon (and no doubling trick). 24 | Reference: [Degenne & Perchet, 2016](http://proceedings.mlr.press/v48/degenne16.pdf). 25 | """ 26 | 27 | def __str__(self): 28 | return "MOSS-Experimental" 29 | 30 | def computeIndex(self, arm): 31 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k, if there is K arms: 32 | 33 | .. math:: 34 | 35 | I_k(t) &= \frac{X_k(t)}{N_k(t)} + \sqrt{ \max\left(0, \frac{\log\left(\frac{t}{\hat{H}(t)}\right)}{N_k(t)}\right)},\\ 36 | \text{where}\;\; \hat{H}(t) &:= \begin{cases} 37 | \sum\limits_{j=1, N_j(t) < \sqrt{t}}^{K} N_j(t) & \;\text{if it is}\; > 0,\\ 38 | K N_k(t) & \;\text{otherwise}\; 39 | \end{cases} 40 | 41 | .. note:: In the article, the authors do not explain this subtlety, and I don't see an argument to justify that at anytime, :math:`\hat{H}(t) > 0` ie to justify that there is always some arms :math:`j` such that :math:`0 < N_j(t) < \sqrt{t}`. 42 | """ 43 | if self.pulls[arm] < 1: 44 | return float('+inf') 45 | else: 46 | pulls_of_suboptimal_arms = np.sum(self.pulls[self.pulls < np.sqrt(self.t)]) 47 | if pulls_of_suboptimal_arms > 0: 48 | return (self.rewards[arm] / self.pulls[arm]) + np.sqrt(0.5 * max(0, np.log(self.t / pulls_of_suboptimal_arms)) / self.pulls[arm]) 49 | else: 50 | return (self.rewards[arm] / self.pulls[arm]) + np.sqrt(0.5 * max(0, np.log(self.t / (self.nbArms * self.pulls[arm]))) / self.pulls[arm]) 51 | 52 | def computeAllIndex(self): 53 | """ Compute the current indexes for all arms, in a vectorized manner.""" 54 | pulls_of_suboptimal_arms = np.sum(self.pulls[self.pulls < np.sqrt(self.t)]) 55 | if pulls_of_suboptimal_arms > 0: 56 | indexes = (self.rewards / self.pulls) + np.sqrt(0.5 * np.maximum(0, np.log(self.t / pulls_of_suboptimal_arms)) / self.pulls) 57 | else: 58 | indexes = (self.rewards / self.pulls) + np.sqrt(0.5 * np.maximum(0, np.log(self.t / (self.nbArms * self.pulls))) / self.pulls) 59 | indexes[self.pulls < 1] = float('+inf') 60 | self.index[:] = indexes 61 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/MOSSH.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The MOSS-H policy for bounded bandits, with knowing the horizon. 3 | Reference: [Audibert & Bubeck, 2010](http://www.jmlr.org/papers/volume11/audibert10a/audibert10a.pdf). 4 | """ 5 | from __future__ import division, print_function # Python 2 compatibility 6 | 7 | __author__ = "Lilian Besson" 8 | __version__ = "0.5" 9 | 10 | import numpy as np 11 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 12 | 13 | try: 14 | from .MOSS import MOSS 15 | except ImportError: 16 | from MOSS import MOSS 17 | 18 | 19 | class MOSSH(MOSS): 20 | """ The MOSS-H policy for bounded bandits, with knowing the horizon. 21 | Reference: [Audibert & Bubeck, 2010](http://www.jmlr.org/papers/volume11/audibert10a/audibert10a.pdf). 22 | """ 23 | 24 | def __init__(self, nbArms, horizon=None, lower=0., amplitude=1.): 25 | super(MOSSH, self).__init__(nbArms, lower=lower, amplitude=amplitude) 26 | self.horizon = int(horizon) #: Parameter :math:`T` = known horizon of the experiment. 27 | 28 | def __str__(self): 29 | return r"MOSS-H($T={}$)".format(self.horizon) 30 | 31 | def computeIndex(self, arm): 32 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k, if there is K arms: 33 | 34 | .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\max\left(0, \frac{\log\left(\frac{T}{K N_k(t)}\right)}{N_k(t)}\right)}. 35 | """ 36 | if self.pulls[arm] < 1: 37 | return float('+inf') 38 | else: 39 | return (self.rewards[arm] / self.pulls[arm]) + np.sqrt(max(0, np.log(self.horizon / (self.nbArms * self.pulls[arm]))) / self.pulls[arm]) 40 | 41 | def computeAllIndex(self): 42 | """ Compute the current indexes for all arms, in a vectorized manner.""" 43 | indexes = (self.rewards / self.pulls) + np.sqrt(np.maximum(0., np.log(self.horizon / (self.nbArms * self.pulls))) / self.pulls) 44 | indexes[self.pulls < 1] = float('+inf') 45 | self.index[:] = indexes 46 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/Makefile: -------------------------------------------------------------------------------- 1 | # Basic Makefile to compile a Cython extension. 2 | # It is used to compile the cython_extensions extension, by running 'make cython_extensions' 3 | 4 | cython_extensions3: cython_extensions 5 | cython_extensions: 6 | python3 setup.py build_ext --inplace 7 | -cp -vf SMPyBandits/Policies/*.so ./ 8 | -chmod -x ./*.so 9 | -chmod g-w ./*.so 10 | -chmod o-w ./*.so 11 | -ls -larth ./*.so 12 | -rm -vfr ./build ./*.c 13 | # -mv -vf ./SMPyBandits /tmp/ 14 | 15 | cython_extensions2: 16 | python2 setup.py build_ext --inplace 17 | -cp -vf SMPyBandits/Policies/*.so ./ 18 | -chmod -x ./*.so 19 | -chmod g-w ./*.so 20 | -chmod o-w ./*.so 21 | -ls -larth ./*.so 22 | -rm -vfr ./build ./*.c 23 | # -mv -vf ./SMPyBandits /tmp/ 24 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/OCUCB.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The Optimally Confident UCB (OC-UCB) policy for bounded stochastic bandits, with sub-Gaussian noise. 3 | 4 | - Reference: [Lattimore, 2016](https://arxiv.org/pdf/1603.08661.pdf). 5 | - There is also a horizon-dependent version, :class:`OCUCBH.OCUCBH`, from [Lattimore, 2015](https://arxiv.org/pdf/1507.07880.pdf). 6 | """ 7 | from __future__ import division, print_function # Python 2 compatibility 8 | 9 | __author__ = "Lilian Besson" 10 | __version__ = "0.9" 11 | 12 | from math import exp, sqrt, log 13 | import numpy as np 14 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 15 | 16 | try: 17 | from .UCB import UCB 18 | except ImportError: 19 | from UCB import UCB 20 | 21 | #: Default value for parameter :math:`\eta > 1` for OCUCB. 22 | ETA = 2 23 | 24 | #: Default value for parameter :math:`\rho \in (1/2, 1]` for OCUCB. 25 | RHO = 1 26 | 27 | 28 | class OCUCB(UCB): 29 | """ The Optimally Confident UCB (OC-UCB) policy for bounded stochastic bandits, with sub-Gaussian noise. 30 | 31 | - Reference: [Lattimore, 2016](https://arxiv.org/pdf/1603.08661.pdf). 32 | """ 33 | 34 | def __init__(self, nbArms, eta=ETA, rho=RHO, lower=0., amplitude=1.): 35 | super(OCUCB, self).__init__(nbArms, lower=lower, amplitude=amplitude) 36 | assert eta > 1, "Error: parameter 'eta' for OCUCB algorithm has to be > 1." # DEBUG 37 | self.eta = eta #: Parameter :math:`\eta > 1`. 38 | assert 0.5 < rho <= 1, "Error: parameter 'rho' for OCUCB algorithm has to be in (1/2, 1]." # DEBUG 39 | self.rho = rho #: Parameter :math:`\rho \in (1/2, 1]`. 40 | 41 | def __str__(self): 42 | return r"OC-UCB($\eta={:.3g}$, $\rho={:.3g}$)".format(self.eta, self.rho) 43 | 44 | def _Bterm(self, k): 45 | r""" Compute the extra term :math:`B_k(t)` as follows: 46 | 47 | .. math:: 48 | 49 | B_k(t) &= \max\Big\{ \exp(1), \log(t), t \log(t) / C_k(t) \Big\},\\ 50 | \text{where}\; C_k(t) &= \sum_{j=1}^{K} \min\left\{ T_k(t), T_j(t)^{\rho} T_k(t)^{1 - \rho} \right\} 51 | """ 52 | t = self.t 53 | T_ = self.pulls 54 | C_kt = sum(min(T_[k], (T_[j] ** self.rho) * (T_[k] ** (1. - self.rho))) for j in range(self.nbArms)) 55 | return max([exp(1), log(t), t * log(t) / C_kt]) 56 | 57 | def _Bterms(self): 58 | r""" Compute all the extra terms, :math:`B_k(t)` for each arm k, in a naive manner, not optimized to be vectorial, but it works.""" 59 | return np.array([self._Bterm(k) for k in range(self.nbArms)]) 60 | 61 | def computeIndex(self, arm): 62 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k: 63 | 64 | .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\frac{2 \eta \log(B_k(t))}{N_k(t)}}. 65 | 66 | - Where :math:`\eta` is a parameter of the algorithm, 67 | - And :math:`B_k(t)` is the additional term defined above. 68 | """ 69 | if self.pulls[arm] < 1: 70 | return float('+inf') 71 | else: 72 | return (self.rewards[arm] / self.pulls[arm]) + sqrt(2 * self.eta * log(self._Bterm(arm)) / self.pulls[arm]) 73 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/PHE.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The PHE, Perturbed-History Exploration, policy for bounded bandits. 3 | 4 | - Reference: [[Perturbed-History Exploration in Stochastic Multi-Armed Bandits, by Branislav Kveton, Csaba Szepesvari, Mohammad Ghavamzadeh, Craig Boutilier, 26 Feb 2019, arXiv:1902.10089]](https://arxiv.org/abs/1902.10089) 5 | """ 6 | from __future__ import division, print_function # Python 2 compatibility 7 | 8 | __author__ = "Lilian Besson" 9 | __version__ = "0.9" 10 | 11 | try: 12 | from .IndexPolicy import IndexPolicy 13 | except ImportError: 14 | from IndexPolicy import IndexPolicy 15 | 16 | from math import ceil 17 | import numpy as np 18 | 19 | #: By default, :math:`a` the perturbation scale in PHE is 1, that is, at current time step t, if there is :math:`s = T_{i,t-1}` samples of arm i, PHE generates :math:`s` pseudo-rewards (of mean :math:`1/2`) 20 | DEFAULT_PERTURBATION_SCALE = 1.0 21 | 22 | 23 | class PHE(IndexPolicy): 24 | """ The PHE, Perturbed-History Exploration, policy for bounded bandits. 25 | 26 | - Reference: [[Perturbed-History Exploration in Stochastic Multi-Armed Bandits, by Branislav Kveton, Csaba Szepesvari, Mohammad Ghavamzadeh, Craig Boutilier, 26 Feb 2019, arXiv:1902.10089]](https://arxiv.org/abs/1902.10089) 27 | 28 | - They prove that PHE achieves a regret of :math:`\mathcal{O}(K \Delta^{-1} \log(T))` regret for horizon :math:`T`, and if :math:`\Delta` is the minimum gap between the expected rewards of the optimal and suboptimal arms, for :math:`a > 1`. 29 | - Note that the limit case of :math:`a=0` gives the Follow-the-Leader algorithm (FTL), known to fail. 30 | """ 31 | def __init__(self, nbArms, perturbation_scale=DEFAULT_PERTURBATION_SCALE, lower=0., amplitude=1.): 32 | assert perturbation_scale > 0, "Error: for PHE class, the parameter perturbation_scale should be > 0, it was {}.".format(perturbation_scale) # DEBUG 33 | self.perturbation_scale = perturbation_scale #: Perturbation scale, denoted :math:`a` in their paper. Should be a float or int number. With :math:`s` current samples, :math:`\lceil a s \rceil` additional pseudo-rewards are generated. 34 | super(PHE, self).__init__(nbArms, lower=lower, amplitude=amplitude) 35 | 36 | def __str__(self): 37 | return r"PHE($a={:.3g}$)".format(self.perturbation_scale) 38 | 39 | def computeIndex(self, arm): 40 | """ Compute a randomized index by adding :math:`a` pseudo-rewards (of mean :math:`1/2`) to the current observations of this arm.""" 41 | s = self.pulls[arm] 42 | if s <= 0: 43 | return float('+inf') 44 | V_is = self.rewards[arm] 45 | number_of_perturbation = ceil(self.perturbation_scale * s) 46 | U_is = np.random.binomial(number_of_perturbation, 0.5) 47 | perturbated_mean = (V_is + U_is) / (s + number_of_perturbation) 48 | return perturbated_mean 49 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/Posterior/Gamma.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Manipulate a Gamma posterior. No need for tricks to handle non-binary rewards. 3 | 4 | - See https://en.wikipedia.org/wiki/Gamma_distribution#Conjugate_prior 5 | - And https://en.wikipedia.org/wiki/Conjugate_prior#Continuous_distributions 6 | """ 7 | from __future__ import division, print_function # Python 2 compatibility 8 | 9 | __author__ = "Emilie Kaufmann, Lilian Besson" 10 | __version__ = "0.6" 11 | 12 | try: 13 | from numpy.random import gamma as gammavariate # Faster! Yes! 14 | except ImportError: 15 | from random import gammavariate 16 | 17 | from scipy.special import gdtrix 18 | 19 | 20 | # Local imports 21 | from .Posterior import Posterior 22 | 23 | 24 | class Gamma(Posterior): 25 | """ Manipulate a Gamma posterior.""" 26 | 27 | def __init__(self, k=1, lmbda=1): 28 | r"""Create a Gamma posterior, :math:`\Gamma(k, \lambda)`, with :math:`k=1` and :math:`\lambda=1` by default.""" 29 | assert k > 0, "Error: parameter 'k' for Beta posterior has to be > 0." 30 | self._k = k 31 | self.k = k #: Parameter :math:`k` 32 | assert lmbda > 0, "Error: parameter 'lmbda' for Beta posterior has to be > 0." 33 | self._lmbda = lmbda 34 | self.lmbda = lmbda #: Parameter :math:`\lambda` 35 | 36 | def __str__(self): 37 | return "Gamma({}, {})".format(self.k, self.lmbda) 38 | 39 | def reset(self, k=None, lmbda=None): 40 | """Reset k and lmbda, both to 1 as when creating a new default Gamma.""" 41 | if k is None: 42 | self.k = self._k 43 | if lmbda is None: 44 | self.lmbda = self._lmbda 45 | 46 | def sample(self): 47 | """Get a random sample from the Beta posterior (using :func:`numpy.random.gammavariate`). 48 | 49 | - Used only by :class:`Thompson` Sampling and :class:`AdBandits` so far. 50 | """ 51 | return gammavariate(self.k, 1. / self.lmbda) 52 | 53 | def quantile(self, p): 54 | """Return the p quantile of the Gamma posterior (using :func:`scipy.stats.gdtrix`). 55 | 56 | - Used only by :class:`BayesUCB` and :class:`AdBandits` so far. 57 | """ 58 | return gdtrix(self.k, 1. / self.lmbda, p) 59 | 60 | def mean(self): 61 | """Compute the mean of the Gamma posterior (should be useless).""" 62 | return self.k / float(self.lmbda) 63 | 64 | def forget(self, obs): 65 | """Forget the last observation.""" 66 | # print("Info: calling Gamma.forget() with obs = {} ...".format(obs)) # DEBUG 67 | self.k += self._k 68 | self.lmbda += obs 69 | 70 | def update(self, obs): 71 | """Add an observation: increase k by k0, and lmbda by obs (do not have to be normalized).""" 72 | # print("Info: calling Gamma.update() with obs = {} ...".format(obs)) # DEBUG 73 | self.k += self._k 74 | self.lmbda += obs 75 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/Posterior/Posterior.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Base class for a posterior. Cf. http://chercheurs.lille.inria.fr/ekaufman/NIPS13 Fig.1 for a list of posteriors. """ 3 | from __future__ import division, print_function # Python 2 compatibility 4 | 5 | __author__ = "Lilian Besson" 6 | __version__ = "0.6" 7 | 8 | 9 | class Posterior(object): 10 | """ Manipulate posteriors experiments.""" 11 | 12 | def __init__(self, *args, **kwargs): 13 | raise NotImplementedError("This method __init__(self, *args, **kwargs) has to be implemented in the child class inheriting from Posterior.") 14 | 15 | def reset(self, *args, **kwargs): 16 | """Reset posterior, new experiment.""" 17 | raise NotImplementedError("This method reset(self, *args, **kwargs) has to be implemented in the child class inheriting from Posterior.") 18 | 19 | def sample(self): 20 | """Sample from the posterior.""" 21 | raise NotImplementedError("This method sample(self) has to be implemented in the child class inheriting from Posterior.") 22 | 23 | def quantile(self, p): 24 | """p quantile from the posterior.""" 25 | raise NotImplementedError("This method quantile(self, p) has to be implemented in the child class inheriting from Posterior.") 26 | 27 | def mean(self): 28 | """Mean of the posterior.""" 29 | raise NotImplementedError("This method mean(self) has to be implemented in the child class inheriting from Posterior.") 30 | 31 | def forget(self, obs): 32 | """Forget last observation (never used).""" 33 | raise NotImplementedError("This method forget(self, obs) has to be implemented in the child class inheriting from Posterior.") 34 | 35 | def update(self, obs): 36 | """Update posterior with this observation.""" 37 | raise NotImplementedError("This method update(self, obs) has to be implemented in the child class inheriting from Posterior.") 38 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/Posterior/README.md: -------------------------------------------------------------------------------- 1 | # [Posteriors for Bayesian Index policies:](https://smpybandits.github.io/docs/Policies.Posterior.html) 2 | > See here the documentation: [docs/Policies.Posterior](https://smpybandits.github.io/docs/Policies.Posterior.html) 3 | 4 | 5 | - [`Beta`](Beta.py) is the default for [`Thompson`](Thompson.py) Sampling and [`BayesUCB`](BayesUCB.py), ideal for Bernoulli experiments, 6 | - [`Gamma`](Gamma.py) and [`Gauss`](Gauss.py) are more suited for respectively Poisson and Gaussian arms. -------------------------------------------------------------------------------- /SMPyBandits/Policies/Posterior/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Posteriors for Bayesian Index policies: 3 | 4 | - :class:`Beta` is the default for :class:`Thompson` Sampling and :class:`BayesUCB`, ideal for Bernoulli experiments, 5 | - :class:`Gamma` and :class:`Gauss` are more suited for respectively Poisson and Gaussian arms, 6 | - :class:`DiscountedBeta` is the default for :class:`Policies.DiscountedThompson` Sampling, ideal for Bernoulli experiments on non stationary bandits. 7 | """ 8 | from __future__ import division, print_function # Python 2 compatibility 9 | 10 | __author__ = "Lilian Besson" 11 | __version__ = "0.9" 12 | 13 | # from .Posterior import Posterior 14 | 15 | from .Beta import Beta 16 | from .DiscountedBeta import DiscountedBeta 17 | from .Gamma import Gamma 18 | from .Gauss import Gauss 19 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/Posterior/with_proba.py: -------------------------------------------------------------------------------- 1 | ../with_proba.py -------------------------------------------------------------------------------- /SMPyBandits/Policies/RCB.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The RCB, Randomized Confidence Bound, policy for bounded bandits. 3 | 4 | - Reference: [["On the Optimality of Perturbations in Stochastic and Adversarial Multi-armed Bandit Problems", by Baekjin Kim, Ambuj Tewari, arXiv:1902.00610]](https://arxiv.org/pdf/1902.00610.pdf) 5 | """ 6 | from __future__ import division, print_function # Python 2 compatibility 7 | 8 | __author__ = "Lilian Besson" 9 | __version__ = "0.9" 10 | 11 | try: 12 | from .RandomizedIndexPolicy import RandomizedIndexPolicy 13 | from .UCBalpha import UCBalpha 14 | except ImportError: 15 | from RandomizedIndexPolicy import RandomizedIndexPolicy 16 | from UCBalpha import UCBalpha 17 | 18 | 19 | class RCB(RandomizedIndexPolicy, UCBalpha): 20 | """ The RCB, Randomized Confidence Bound, policy for bounded bandits. 21 | 22 | - Reference: [["On the Optimality of Perturbations in Stochastic and Adversarial Multi-armed Bandit Problems", by Baekjin Kim, Ambuj Tewari, arXiv:1902.00610]](https://arxiv.org/pdf/1902.00610.pdf) 23 | """ 24 | # FIXME I should implement these RandomizedIndexPolicy variants in a more generic way! 25 | pass 26 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/RH_UCB_Temp.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function # Python 2 compatibility 3 | 4 | __author__ = "SlyJabiru" 5 | __version__ = "0.1" 6 | 7 | import random 8 | from math import sqrt, log 9 | import numpy as np 10 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 11 | 12 | 13 | try: 14 | from .StrategicIndexPolicy import StrategicIndexPolicy 15 | except ImportError: 16 | from StrategicIndexPolicy import StrategicIndexPolicy 17 | 18 | 19 | class RH_UCB_Temp(StrategicIndexPolicy): 20 | def __init__(self, nbArms, nbAgents, nbArmsPerAgents, 21 | lower=0., amplitude=1.): 22 | super(RH_UCB_Temp, self).__init__(nbArms, nbAgents, nbArmsPerAgents, 23 | lower=lower, amplitude=amplitude) 24 | 25 | def computeAgentIndex(self, agent): 26 | if self.agentPulls[agent] < 1: 27 | return float('+inf') 28 | else: 29 | return (self.agentRewards[agent] / self.agentPulls[agent]) + sqrt(sqrt(self.t) * log(self.t) / self.agentPulls[agent]) 30 | 31 | def computeArmIndex(self, arm): 32 | if self.armPulls[arm] < 1: 33 | return float('+inf') 34 | else: 35 | armPossession = np.cumsum(self.nbArmsPerAgents) - 1 36 | temp = (armPossession >= arm) 37 | agent = np.where(temp)[0][0] 38 | return (self.armRewards[arm] / self.armPulls[arm]) + sqrt((2 * log(self.agentPulls[agent])) / self.armPulls[arm]) 39 | 40 | def computeAllIndex(self): 41 | """ Compute the current indices for all agent and all arms, in a vectorized manner.""" 42 | agentIndices = (self.agentRewards / self.agentPulls) + np.sqrt(np.sqrt(self.t) * np.log(self.t) / self.agentPulls) 43 | 44 | agentPullsRepeated = np.repeat(self.agentPulls, self.nbArmsPerAgents) 45 | armIndices = (self.armRewards / self.armPulls) + np.sqrt((2 * np.log(agentPullsRepeated)) / self.armPulls) 46 | 47 | agentIndices[self.agentPulls < 1] = float('+inf') 48 | armIndices[self.armPulls < 1] = float('+inf') 49 | 50 | self.agentIndex[:] = agentIndices 51 | self.armIndex[:] = armIndices 52 | 53 | 54 | # --- Debugging 55 | 56 | # if __name__ == "__main__": 57 | # # Code for debugging purposes. 58 | # from doctest import testmod 59 | # print("\nTesting automatically all the docstring written in each functions of this module :") 60 | # testmod(verbose=True) 61 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/StrategicUCB2PhaseRobustDeprecated.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function # Python 2 compatibility 3 | 4 | __author__ = "SlyJabiru" 5 | __version__ = "0.1" 6 | 7 | 8 | from math import sqrt, log 9 | import numpy as np 10 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 11 | 12 | 13 | try: 14 | from .StrategicIndexPolicy import StrategicIndexPolicy 15 | except ImportError: 16 | from StrategicIndexPolicy import StrategicIndexPolicy 17 | 18 | 19 | class StrategicUCB2PhaseRobustDeprecated(StrategicIndexPolicy): 20 | def computeAgentIndex(self, agent): 21 | if self.agentPulls[agent] < 1: 22 | return float('+inf') 23 | else: 24 | return (self.agentRewards[agent] / self.agentPulls[agent]) + sqrt(sqrt(self.t * log(self.t)) / self.agentPulls[agent]) 25 | 26 | def computeArmIndex(self, arm): 27 | if self.armPulls[arm] < 1: 28 | return float('+inf') 29 | else: 30 | armPossession = np.cumsum(self.nbArmsPerAgents) - 1 31 | temp = (armPossession >= arm) 32 | agent = np.where(temp)[0][0] 33 | return (self.armRewards[arm] / self.armPulls[arm]) + sqrt((2 * log(self.agentPulls[agent])) / self.armPulls[arm]) 34 | 35 | def computeAllIndex(self): 36 | """ Compute the current indices for all agent and all arms, in a vectorized manner.""" 37 | agentIndices = (self.agentRewards / self.agentPulls) + np.sqrt(np.sqrt(self.t * np.log(self.t)) / self.agentPulls) 38 | 39 | agentPullsRepeated = np.repeat(self.agentPulls, self.nbArmsPerAgents) 40 | armIndices = (self.armRewards / self.armPulls) + np.sqrt((2 * np.log(agentPullsRepeated)) / self.armPulls) 41 | 42 | agentIndices[self.agentPulls < 1] = float('+inf') 43 | armIndices[self.armPulls < 1] = float('+inf') 44 | 45 | self.agentIndex[:] = agentIndices 46 | self.armIndex[:] = armIndices 47 | 48 | 49 | # --- Debugging 50 | 51 | # if __name__ == "__main__": 52 | # # Code for debugging purposes. 53 | # from doctest import testmod 54 | # print("\nTesting automatically all the docstring written in each functions of this module :") 55 | # testmod(verbose=True) 56 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/SuccessiveElimination.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Generic policy based on successive elimination, mostly useless except to maintain a clear hierarchy of inheritance. 3 | """ 4 | 5 | __author__ = "Lilian Besson" 6 | __version__ = "0.9" 7 | 8 | from numpy import sqrt, log 9 | import numpy as np 10 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 11 | 12 | try: 13 | from .IndexPolicy import IndexPolicy 14 | except ImportError: 15 | from IndexPolicy import IndexPolicy 16 | 17 | 18 | class SuccessiveElimination(IndexPolicy): 19 | """ Generic policy based on successive elimination, mostly useless except to maintain a clear hierarchy of inheritance. 20 | """ 21 | 22 | def choice(self): 23 | r""" In policy based on successive elimination, choosing an arm is the same as choosing an arm from the set of active arms (``self.activeArms``) with method ``choiceFromSubSet``. 24 | """ 25 | return self.choiceFromSubSet(self.activeArms) 26 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/TakeFixedArm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ TakeFixedArm: always select a fixed arm. 3 | This is the perfect static policy if armIndex = bestArmIndex (not realistic, for test only). 4 | """ 5 | from __future__ import division, print_function # Python 2 compatibility 6 | 7 | __author__ = "Lilian Besson" 8 | __version__ = "0.9" 9 | 10 | try: 11 | from .BasePolicy import BasePolicy 12 | except ImportError: 13 | from BasePolicy import BasePolicy 14 | 15 | 16 | class TakeFixedArm(BasePolicy): 17 | """ TakeFixedArm: always select a fixed arm. 18 | This is the perfect static policy if armIndex = bestArmIndex (not realistic, for test only). 19 | """ 20 | 21 | def __init__(self, nbArms, armIndex=None, lower=0., amplitude=1.): 22 | self.nbArms = nbArms #: Number of arms 23 | if armIndex is None: 24 | armIndex = 0 25 | self.armIndex = armIndex #: Fixed arm 26 | 27 | def __str__(self): 28 | return "TakeFixedArm({})".format(self.armIndex) 29 | 30 | def startGame(self): 31 | """Nothing to do.""" 32 | pass 33 | 34 | def getReward(self, arm, reward): 35 | """Nothing to do.""" 36 | pass 37 | 38 | def choice(self): 39 | """Always the same choice.""" 40 | return self.armIndex 41 | 42 | def choiceWithRank(self, rank=1): 43 | """ Ignore the rank.""" 44 | return self.choice() 45 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/TakeRandomFixedArm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ TakeRandomFixedArm: always select a fixed arm. 3 | This is the perfect static policy if armIndex = bestArmIndex (not realistic, for test only). 4 | """ 5 | from __future__ import division, print_function # Python 2 compatibility 6 | 7 | __author__ = "Lilian Besson" 8 | __version__ = "0.9" 9 | 10 | import numpy as np 11 | import numpy.random as rn 12 | 13 | try: 14 | from .TakeFixedArm import TakeFixedArm 15 | except ImportError: 16 | from TakeFixedArm import TakeFixedArm 17 | 18 | 19 | class TakeRandomFixedArm(TakeFixedArm): 20 | """ TakeRandomFixedArm: first selects a random sub-set of arms, then always select from it. """ 21 | 22 | def __init__(self, nbArms, lower=0., amplitude=1., nbArmIndexes=None): 23 | self.nbArms = nbArms #: Number of arms 24 | #: Get the number of arms, randomly! 25 | if nbArmIndexes is None: 26 | nbArmIndexes = rn.randint(low=1, high=1 + int(nbArms / 2.)) 27 | #: Fix the set of arms 28 | self.armIndexes = list(rn.choice(np.arange(nbArms), size=nbArmIndexes, replace=False)) 29 | 30 | def __str__(self): 31 | return "TakeRandomFixedArm({})".format(self.armIndexes) 32 | 33 | def choice(self): 34 | """Uniform choice from armIndexes.""" 35 | return rn.choice(self.armIndexes) 36 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/Thompson.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The Thompson (Bayesian) index policy. 3 | 4 | - By default, it uses a Beta posterior (:class:`Policies.Posterior.Beta`), one by arm. 5 | - Reference: [Thompson - Biometrika, 1933]. 6 | """ 7 | from __future__ import division, print_function # Python 2 compatibility 8 | 9 | __author__ = "Olivier Cappé, Aurélien Garivier, Emilie Kaufmann, Lilian Besson" 10 | __version__ = "0.9" 11 | 12 | try: 13 | from .BayesianIndexPolicy import BayesianIndexPolicy 14 | except (ImportError, SystemError): 15 | from BayesianIndexPolicy import BayesianIndexPolicy 16 | 17 | 18 | class Thompson(BayesianIndexPolicy): 19 | r"""The Thompson (Bayesian) index policy. 20 | 21 | - By default, it uses a Beta posterior (:class:`Policies.Posterior.Beta`), one by arm. 22 | - Prior is initially flat, i.e., :math:`a=\alpha_0=1` and :math:`b=\beta_0=1`. 23 | 24 | - A non-flat prior for each arm can be given with parameters ``a`` and ``b``, for instance:: 25 | 26 | nbArms = 2 27 | prior_failures = a = 100 28 | prior_successes = b = 50 29 | policy = Thompson(nbArms, a=a, b=b) 30 | np.mean([policy.choice() for _ in range(1000)]) # 0.515 ~= 0.5: each arm has same prior! 31 | 32 | - A different prior for each arm can be given with parameters ``params_for_each_posterior``, for instance:: 33 | 34 | nbArms = 2 35 | params0 = { 'a': 10, 'b': 5} # mean 1/3 36 | params1 = { 'a': 5, 'b': 10} # mean 2/3 37 | params = [params0, params1] 38 | policy = Thompson(nbArms, params_for_each_posterior=params) 39 | np.mean([policy.choice() for _ in range(1000)]) # 0.9719 ~= 1: arm 1 is better than arm 0 ! 40 | 41 | - Reference: [Thompson - Biometrika, 1933]. 42 | """ 43 | 44 | def __str__(self): 45 | return "Thompson Sampling" 46 | 47 | def computeIndex(self, arm): 48 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k, giving :math:`S_k(t)` rewards of 1, by sampling from the Beta posterior: 49 | 50 | .. math:: 51 | A(t) &\sim U(\arg\max_{1 \leq k \leq K} I_k(t)),\\ 52 | I_k(t) &\sim \mathrm{Beta}(1 + \tilde{S_k}(t), 1 + \tilde{N_k}(t) - \tilde{S_k}(t)). 53 | """ 54 | return self.posterior[arm].sample() 55 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/UCB.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The UCB policy for bounded bandits. 3 | 4 | - Reference: [Lai & Robbins, 1985]. 5 | """ 6 | from __future__ import division, print_function # Python 2 compatibility 7 | 8 | __author__ = "Lilian Besson" 9 | __version__ = "0.1" 10 | 11 | from math import sqrt, log 12 | import numpy as np 13 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 14 | 15 | try: 16 | from .IndexPolicy import IndexPolicy 17 | except ImportError: 18 | from IndexPolicy import IndexPolicy 19 | 20 | 21 | class UCB(IndexPolicy): 22 | """ The UCB policy for bounded bandits. 23 | 24 | - Reference: [Lai & Robbins, 1985]. 25 | """ 26 | 27 | def computeIndex(self, arm): 28 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k: 29 | 30 | .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\frac{2 \log(t)}{N_k(t)}}. 31 | """ 32 | if self.pulls[arm] < 1: 33 | return float('+inf') 34 | else: 35 | return (self.rewards[arm] / self.pulls[arm]) + sqrt((2 * log(self.t)) / self.pulls[arm]) 36 | 37 | def computeAllIndex(self): 38 | """ Compute the current indexes for all arms, in a vectorized manner.""" 39 | indexes = (self.rewards / self.pulls) + np.sqrt((2 * np.log(self.t)) / self.pulls) 40 | indexes[self.pulls < 1] = float('+inf') 41 | self.index[:] = indexes 42 | 43 | 44 | # --- Debugging 45 | 46 | if __name__ == "__main__": 47 | # Code for debugging purposes. 48 | from doctest import testmod 49 | print("\nTesting automatically all the docstring written in each functions of this module :") 50 | testmod(verbose=True) 51 | 52 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/UCBH.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The UCB-H policy for bounded bandits, with knowing the horizon. 3 | Reference: [Audibert et al. 09]. 4 | """ 5 | 6 | __author__ = "Lilian Besson" 7 | __version__ = "0.6" 8 | 9 | from numpy import sqrt, log 10 | import numpy as np 11 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 12 | 13 | try: 14 | from .UCBalpha import UCBalpha, ALPHA 15 | except ImportError: 16 | from UCBalpha import UCBalpha, ALPHA 17 | 18 | 19 | class UCBH(UCBalpha): 20 | """ The UCB-H policy for bounded bandits, with knowing the horizon. 21 | Reference: [Audibert et al. 09]. 22 | """ 23 | 24 | def __init__(self, nbArms, horizon=None, alpha=ALPHA, lower=0., amplitude=1.): 25 | super(UCBH, self).__init__(nbArms, lower=lower, amplitude=amplitude) 26 | self.horizon = int(horizon) #: Parameter :math:`T` = known horizon of the experiment. 27 | self.alpha = alpha #: Parameter alpha 28 | 29 | def __str__(self): 30 | return r"UCB-H($T={}$, $\alpha={:.3g}$)".format(self.horizon, self.alpha) 31 | 32 | def computeIndex(self, arm): 33 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k: 34 | 35 | .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\frac{\alpha \log(T)}{2 N_k(t)}}. 36 | """ 37 | if self.pulls[arm] < 1: 38 | return float('+inf') 39 | else: 40 | return (self.rewards[arm] / self.pulls[arm]) + sqrt((self.alpha * log(self.horizon)) / (2 * self.pulls[arm])) 41 | 42 | def computeAllIndex(self): 43 | """ Compute the current indexes for all arms, in a vectorized manner.""" 44 | indexes = (self.rewards / self.pulls) + np.sqrt((self.alpha * np.log(self.horizon)) / (2 * self.pulls)) 45 | indexes[self.pulls < 1] = float('+inf') 46 | self.index[:] = indexes 47 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/UCBV.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The UCB-V policy for bounded bandits, with a variance correction term. 3 | Reference: [Audibert, Munos, & Szepesvári - Theoret. Comput. Sci., 2009]. 4 | """ 5 | from __future__ import division, print_function # Python 2 compatibility 6 | 7 | __author__ = "Olivier Cappé, Aurélien Garivier, Lilian Besson" 8 | __version__ = "0.5" 9 | 10 | from math import sqrt, log 11 | import numpy as np 12 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 13 | 14 | try: 15 | from .UCB import UCB 16 | except ImportError: 17 | from UCB import UCB 18 | 19 | 20 | class UCBV(UCB): 21 | """ The UCB-V policy for bounded bandits, with a variance correction term. 22 | Reference: [Audibert, Munos, & Szepesvári - Theoret. Comput. Sci., 2009]. 23 | """ 24 | def __str__(self): 25 | return "UCB-V" 26 | 27 | def __init__(self, nbArms, lower=0., amplitude=1.): 28 | super(UCBV, self).__init__(nbArms, lower=lower, amplitude=amplitude) 29 | self.rewardsSquared = np.zeros(self.nbArms) #: Keep track of squared of rewards, to compute an empirical variance 30 | 31 | def startGame(self): 32 | super(UCBV, self).startGame() 33 | self.rewardsSquared.fill(0) 34 | 35 | def getReward(self, arm, reward): 36 | """Give a reward: increase t, pulls, and update cumulated sum of rewards and of rewards squared for that arm (normalized in [0, 1]).""" 37 | super(UCBV, self).getReward(arm, reward) 38 | self.rewardsSquared[arm] += ((reward - self.lower) / self.amplitude) ** 2 39 | 40 | def computeIndex(self, arm): 41 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k: 42 | 43 | .. math:: 44 | 45 | \hat{\mu}_k(t) &= \frac{X_k(t)}{N_k(t)}, \\ 46 | V_k(t) &= \frac{Z_k(t)}{N_k(t)} - \hat{\mu}_k(t)^2, \\ 47 | I_k(t) &= \hat{\mu}_k(t) + \sqrt{\frac{2 \log(t) V_k(t)}{N_k(t)}} + 3 (b - a) \frac{\log(t)}{N_k(t)}. 48 | 49 | Where rewards are in :math:`[a, b]`, and :math:`V_k(t)` is an estimator of the variance of rewards, 50 | obtained from :math:`X_k(t) = \sum_{\sigma=1}^{t} 1(A(\sigma) = k) r_k(\sigma)` is the sum of rewards from arm k, 51 | and :math:`Z_k(t) = \sum_{\sigma=1}^{t} 1(A(\sigma) = k) r_k(\sigma)^2` is the sum of rewards *squared*. 52 | """ 53 | if self.pulls[arm] < 1: 54 | return float('+inf') 55 | else: 56 | mean = self.rewards[arm] / self.pulls[arm] # Mean estimate 57 | variance = (self.rewardsSquared[arm] / self.pulls[arm]) - mean ** 2 # Variance estimate 58 | return mean + sqrt(2.0 * log(self.t) * variance / self.pulls[arm]) + 3.0 * self.amplitude * log(self.t) / self.pulls[arm] 59 | 60 | def computeAllIndex(self): 61 | """ Compute the current indexes for all arms, in a vectorized manner.""" 62 | means = self.rewards / self.pulls # Mean estimate 63 | variances = (self.rewardsSquared / self.pulls) - means ** 2 # Variance estimate 64 | indexes = means + np.sqrt(2.0 * np.log(self.t) * variances / self.pulls) + 3.0 * self.amplitude * np.log(self.t) / self.pulls 65 | indexes[self.pulls < 1] = float('+inf') 66 | self.index[:] = indexes 67 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/UCBVtuned.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The UCBV-Tuned policy for bounded bandits, with a tuned variance correction term. 3 | Reference: [Auer et al. 02]. 4 | """ 5 | from __future__ import division, print_function # Python 2 compatibility 6 | 7 | __author__ = "Olivier Cappé, Aurélien Garivier, Lilian Besson" 8 | __version__ = "0.5" 9 | 10 | from math import sqrt, log 11 | import numpy as np 12 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 13 | 14 | try: 15 | from .UCBV import UCBV 16 | except ImportError: 17 | from UCBV import UCBV 18 | 19 | 20 | class UCBVtuned(UCBV): 21 | """ The UCBV-Tuned policy for bounded bandits, with a tuned variance correction term. 22 | Reference: [Auer et al. 02]. 23 | """ 24 | def __str__(self): 25 | return "UCB-V-Tuned" 26 | 27 | def computeIndex(self, arm): 28 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k: 29 | 30 | .. math:: 31 | 32 | \hat{\mu}_k(t) &= \frac{X_k(t)}{N_k(t)}, \\ 33 | V_k(t) &= \frac{Z_k(t)}{N_k(t)} - \hat{\mu}_k(t)^2, \\ 34 | V'_k(t) &= V_k(t) + \sqrt{\frac{2 \log(t)}{N_k(t)}}, \\ 35 | I_k(t) &= \hat{\mu}_k(t) + \sqrt{\frac{\log(t) V'_k(t)}{N_k(t)}}. 36 | 37 | Where :math:`V'_k(t)` is an other estimator of the variance of rewards, 38 | obtained from :math:`X_k(t) = \sum_{\sigma=1}^{t} 1(A(\sigma) = k) r_k(\sigma)` is the sum of rewards from arm k, 39 | and :math:`Z_k(t) = \sum_{\sigma=1}^{t} 1(A(\sigma) = k) r_k(\sigma)^2` is the sum of rewards *squared*. 40 | """ 41 | if self.pulls[arm] < 1: 42 | return float('+inf') 43 | else: 44 | mean = self.rewards[arm] / self.pulls[arm] # Mean estimate 45 | variance = (self.rewardsSquared[arm] / self.pulls[arm]) - mean ** 2 # Variance estimate 46 | # Correct variance estimate 47 | variance += sqrt(2.0 * log(self.t) / self.pulls[arm]) 48 | return mean + sqrt(log(self.t) * variance / self.pulls[arm]) 49 | 50 | def computeAllIndex(self): 51 | """ Compute the current indexes for all arms, in a vectorized manner.""" 52 | means = self.rewards / self.pulls # Mean estimate 53 | variances = (self.rewardsSquared / self.pulls) - means ** 2 # Variance estimate 54 | variances += np.sqrt(2.0 * np.log(self.t) / self.pulls) 55 | indexes = means + np.sqrt(np.log(self.t) * variances / self.pulls) 56 | indexes[self.pulls < 1] = float('+inf') 57 | self.index[:] = indexes 58 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/UCBalpha.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The UCB1 (UCB-alpha) index policy, modified to take a random permutation order for the initial exploration of each arm (reduce collisions in the multi-players setting). 3 | Reference: [Auer et al. 02]. 4 | """ 5 | from __future__ import division, print_function # Python 2 compatibility 6 | 7 | __author__ = "Lilian Besson" 8 | __version__ = "0.9" 9 | 10 | from math import sqrt, log 11 | import numpy as np 12 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 13 | 14 | try: 15 | from .UCB import UCB 16 | except ImportError: 17 | from UCB import UCB 18 | 19 | #: Default parameter for alpha 20 | ALPHA = 1 21 | ALPHA = 4 22 | 23 | 24 | class UCBalpha(UCB): 25 | """ The UCB1 (UCB-alpha) index policy, modified to take a random permutation order for the initial exploration of each arm (reduce collisions in the multi-players setting). 26 | Reference: [Auer et al. 02]. 27 | """ 28 | 29 | def __init__(self, nbArms, alpha=ALPHA, lower=0., amplitude=1.): 30 | super(UCBalpha, self).__init__(nbArms, lower=lower, amplitude=amplitude) 31 | assert alpha >= 0, "Error: the alpha parameter for UCBalpha class has to be >= 0." # DEBUG 32 | self.alpha = alpha #: Parameter alpha 33 | 34 | def __str__(self): 35 | return r"UCB($\alpha={:.3g}$)".format(self.alpha) 36 | 37 | def computeIndex(self, arm): 38 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k: 39 | 40 | .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\frac{\alpha \log(t)}{2 N_k(t)}}. 41 | """ 42 | if self.pulls[arm] < 1: 43 | return float('+inf') 44 | else: 45 | return (self.rewards[arm] / self.pulls[arm]) + sqrt((self.alpha * log(self.t)) / (2 * self.pulls[arm])) 46 | 47 | def computeAllIndex(self): 48 | """ Compute the current indexes for all arms, in a vectorized manner.""" 49 | indexes = (self.rewards / self.pulls) + np.sqrt((self.alpha * np.log(self.t)) / (2 * self.pulls)) 50 | indexes[self.pulls < 1] = float('+inf') 51 | self.index[:] = indexes 52 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/UCBmin.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | r""" The UCB-min policy for bounded bandits, with a :math:`\min\left(1, \sqrt{\frac{\log(t)}{2 N_k(t)}}\right)` term. 3 | Reference: [Anandkumar et al., 2010]. 4 | """ 5 | from __future__ import division, print_function # Python 2 compatibility 6 | 7 | __author__ = "Lilian Besson" 8 | __version__ = "0.1" 9 | 10 | from math import sqrt, log 11 | import numpy as np 12 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 13 | 14 | try: 15 | from .UCB import UCB 16 | except ImportError: 17 | from UCB import UCB 18 | 19 | 20 | class UCBmin(UCB): 21 | r""" The UCB-min policy for bounded bandits, with a :math:`\min\left(1, \sqrt{\frac{\log(t)}{2 N_k(t)}}\right)` term. 22 | Reference: [Anandkumar et al., 2010]. 23 | """ 24 | 25 | def computeIndex(self, arm): 26 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k: 27 | 28 | .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \min\left(1, \sqrt{\frac{\log(t)}{2 N_k(t)}}\right). 29 | """ 30 | if self.pulls[arm] < 1: 31 | return float('+inf') 32 | else: 33 | return (self.rewards[arm] / self.pulls[arm]) + min(1., sqrt(log(self.t) / (2 * self.pulls[arm]))) 34 | 35 | def computeAllIndex(self): 36 | """ Compute the current indexes for all arms, in a vectorized manner.""" 37 | indexes = (self.rewards / self.pulls) + np.minimum(1., np.sqrt((2 * np.log10(self.t)) / self.pulls)) 38 | indexes[self.pulls < 1] = float('+inf') 39 | self.index[:] = indexes 40 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/UCBplus.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | r""" The UCB+ policy for bounded bandits, with a small trick on the index. 3 | 4 | - Reference: [Auer et al. 2002], and [[Garivier et al. 2016](https://arxiv.org/pdf/1605.08988.pdf)] (it is noted :math:`\mathrm{UCB}^*` in the second article). 5 | """ 6 | from __future__ import division, print_function # Python 2 compatibility 7 | 8 | __author__ = "Lilian Besson" 9 | __version__ = "0.9" 10 | 11 | from math import sqrt, log 12 | import numpy as np 13 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 14 | 15 | try: 16 | from .UCB import UCB 17 | except ImportError: 18 | from UCB import UCB 19 | 20 | 21 | class UCBplus(UCB): 22 | r""" The UCB+ policy for bounded bandits, with a small trick on the index. 23 | 24 | - Reference: [Auer et al. 2002], and [[Garivier et al. 2016](https://arxiv.org/pdf/1605.08988.pdf)] (it is noted :math:`\mathrm{UCB}^*` in the second article). 25 | """ 26 | 27 | def __str__(self): 28 | return "UCB+" 29 | 30 | def computeIndex(self, arm): 31 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k: 32 | 33 | .. math:: I_k(t) = \frac{X_k(t)}{N_k(t)} + \sqrt{\max\left(0, \frac{\log(t / N_k(t))}{2 N_k(t)}\right)}. 34 | """ 35 | if self.pulls[arm] < 1: 36 | return float('+inf') 37 | else: 38 | return (self.rewards[arm] / self.pulls[arm]) + sqrt(max(0., log(self.t / (self.pulls[arm]))) / (2 * self.pulls[arm])) 39 | 40 | def computeAllIndex(self): 41 | """ Compute the current indexes for all arms, in a vectorized manner.""" 42 | indexes = (self.rewards / self.pulls) + np.sqrt(np.maximum(0., (2 * np.log10(self.t)) / self.pulls)) 43 | indexes[self.pulls < 1] = float('+inf') 44 | self.index[:] = indexes 45 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/UCBrandomInit.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The UCB index policy, modified to take a random permutation order for the initial exploration of each arm (could reduce collisions in the multi-players setting). 3 | Reference: [Lai & Robbins, 1985]. 4 | """ 5 | from __future__ import division, print_function # Python 2 compatibility 6 | 7 | __author__ = "Lilian Besson" 8 | __version__ = "0.2" 9 | 10 | import numpy as np 11 | 12 | try: 13 | from .UCB import UCB 14 | except ImportError: 15 | from UCB import UCB 16 | 17 | 18 | class UCBrandomInit(UCB): 19 | """ The UCB index policy, modified to take a random permutation order for the initial exploration of each arm (could reduce collisions in the multi-players setting). 20 | Reference: [Lai & Robbins, 1985]. 21 | """ 22 | 23 | def __init__(self, nbArms, lower=0., amplitude=1.): 24 | super(UCBrandomInit, self).__init__(nbArms, lower=lower, amplitude=amplitude) 25 | # Trying to randomize the order of the initial visit to each arm; as this determinism breaks its habitility to play efficiently in multi-players games 26 | self._initial_exploration = np.random.permutation(nbArms) 27 | # The proba that another player has the same is nbPlayers / factorial(nbArms) : should be SMALL ! 28 | # print("One UCB player with _initial_exploration =", self._initial_exploration) # DEBUG 29 | 30 | def choice(self): 31 | if self.t < self.nbArms: # Force to first visit each arm in a certain random order 32 | return self._initial_exploration[self.t] # Better: random permutation! 33 | else: 34 | return super(UCBrandomInit, self).choice() 35 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/Uniform.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Uniform: the fully uniform policy who selects randomly (uniformly) an arm at each step (stupid). 3 | """ 4 | 5 | __author__ = "Lilian Besson" 6 | __version__ = "0.1" 7 | 8 | import random 9 | 10 | try: 11 | from .BasePolicy import BasePolicy 12 | except ImportError: 13 | from BasePolicy import BasePolicy 14 | 15 | 16 | class Uniform(BasePolicy): 17 | """ Uniform: the fully uniform policy who selects randomly (uniformly) an arm at each step (stupid). 18 | """ 19 | 20 | def __init__(self, nbArms, lower=0., amplitude=1.): 21 | """Nothing to do.""" 22 | self.nbArms = nbArms #: Number of arms 23 | 24 | def __str__(self): 25 | return "U(1..{})".format(self.nbArms) 26 | 27 | def startGame(self): 28 | """Nothing to do.""" 29 | pass 30 | 31 | def getReward(self, arm, reward): 32 | """Nothing to do.""" 33 | pass 34 | 35 | def choice(self): 36 | """Uniform random choice between 0 and nbArms - 1 (included).""" 37 | return random.randint(0, self.nbArms - 1) 38 | 39 | def choiceWithRank(self, rank=1): 40 | """Ignore the rank!""" 41 | return self.choice() 42 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/UniformOnSome.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ UniformOnSome: a fully uniform policy who selects randomly (uniformly) an arm among a fix set, at each step (stupid). 3 | """ 4 | from __future__ import division, print_function # Python 2 compatibility 5 | 6 | __author__ = "Lilian Besson" 7 | __version__ = "0.1" 8 | 9 | import random 10 | 11 | try: 12 | from .Uniform import Uniform 13 | except ImportError: 14 | from Uniform import Uniform 15 | 16 | 17 | class UniformOnSome(Uniform): 18 | """ UniformOnSome: a fully uniform policy who selects randomly (uniformly) an arm among a fix set, at each step (stupid). 19 | """ 20 | 21 | def __init__(self, nbArms, armIndexes=None, lower=0., amplitude=1.): 22 | self.nbArms = nbArms #: Number of arms 23 | if armIndexes is None: 24 | armIndexes = list(range(nbArms)) 25 | self.armIndexes = armIndexes #: Arms from where to uniformly sample 26 | 27 | def __str__(self): 28 | return "UniformOnSome({})".format(self.armIndexes) 29 | 30 | def choice(self): 31 | """Uniform choice from armIndexes.""" 32 | return random.choice(self.armIndexes) 33 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/_test_for_BESA_core_function.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Test of the core function of BESA algorithm. 3 | 4 | $ ipython 5 | In [1]: run _test_for_BESA_core_function.py 6 | 7 | In [2]: %timeit manualbranching(random_samples(a, mu_a, N, 2 * N), random_samples(b, mu_b, N, 2 * N)) 8 | 46.3 µs ± 3.95 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each) 9 | 10 | In [3]: %timeit numpytest(random_samples(a, mu_a, N, 2 * N), random_samples(b, mu_b, N, 2 * N)) 11 | 61.9 µs ± 6.76 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each) 12 | """ 13 | from __future__ import division, print_function # Python 2 compatibility 14 | 15 | __author__ = "Lilian Besson" 16 | __version__ = "0.9" 17 | 18 | import numpy as np 19 | import timeit 20 | 21 | 22 | def manualbranching(tuple_a, tuple_b): 23 | Na, mean_a, a = tuple_a 24 | Nb, mean_b, b = tuple_b 25 | if mean_a > mean_b: 26 | return a 27 | elif mean_a < mean_b: 28 | return b 29 | else: 30 | if Na < Nb: 31 | return a 32 | elif Na > Nb: 33 | return b 34 | else: # if no way of breaking the tie, choose uniformly at random 35 | return np.random.choice([a, b]) 36 | 37 | 38 | def numpytest(tuple_a, tuple_b): 39 | Na, mean_a, samples_a, a = tuple_a 40 | Nb, mean_b, samples_b, b = tuple_b 41 | if mean_a != mean_b: 42 | return [a, b][np.argmax([mean_a, mean_b])] 43 | else: 44 | return [a, b][np.argmin([Na, Nb])] 45 | 46 | 47 | def random_samples(i, mu, N1, N2): 48 | N1, N2 = min(N1, N2), max(N1, N2) 49 | N = np.random.randint(N1, high=N2) 50 | samples = np.asarray(np.random.binomial(1, mu, N), dtype=float) 51 | mean = np.mean(samples) 52 | return N, mean, samples, i 53 | 54 | 55 | def main(N=10, mu_a=0.5, mu_b=0.5): 56 | a, b = 0, 1 57 | print("For the function 'manualbranching' run:") 58 | print("%timeit manualbranching(random_samples(a, mu_a, N, 2 * N), random_samples(b, mu_b, N, 2 * N))") 59 | print("For the function 'numpytest' run:") 60 | print("%timeit numpytest(random_samples(a, mu_a, N, 2 * N), random_samples(b, mu_b, N, 2 * N))") 61 | 62 | 63 | if __name__ == '__main__': 64 | main() 65 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/klUCBH.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The kl-UCB-H policy, for one-parameter exponential distributions. 3 | Reference: [Lai 87](https://projecteuclid.org/download/pdf_1/euclid.aos/1176350495) 4 | """ 5 | from __future__ import division, print_function # Python 2 compatibility 6 | 7 | __author__ = "Lilian Besson" 8 | __version__ = "0.1" 9 | 10 | from math import log 11 | import numpy as np 12 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 13 | 14 | try: 15 | from .kullback import klucbBern 16 | from .klUCB import klUCB, c 17 | except ImportError: 18 | from kullback import klucbBern 19 | from klUCB import klUCB, c 20 | 21 | 22 | class klUCBH(klUCB): 23 | """ The kl-UCB-H policy, for one-parameter exponential distributions. 24 | Reference: [Lai 87](https://projecteuclid.org/download/pdf_1/euclid.aos/1176350495) 25 | """ 26 | 27 | def __init__(self, nbArms, horizon=None, tolerance=1e-4, klucb=klucbBern, c=c, lower=0., amplitude=1.): 28 | super(klUCBH, self).__init__(nbArms, tolerance=tolerance, klucb=klucb, c=c, lower=lower, amplitude=amplitude) 29 | self.horizon = int(horizon) #: Parameter :math:`T` = known horizon of the experiment. 30 | 31 | def __str__(self): 32 | return r"kl-UCB-H($T={}$, {}{})".format(self.horizon, "" if self.c == 1 else r"$c={:.3g}$".format(self.c), self.klucb.__name__[5:]) 33 | 34 | def computeIndex(self, arm): 35 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k: 36 | 37 | .. math:: 38 | 39 | \hat{\mu}_k(t) &= \frac{X_k(t)}{N_k(t)}, \\ 40 | U_k(t) &= \sup\limits_{q \in [a, b]} \left\{ q : \mathrm{kl}(\hat{\mu}_k(t), q) \leq \frac{c \log(T)}{N_k(t)} \right\},\\ 41 | I_k(t) &= U_k(t). 42 | 43 | If rewards are in :math:`[a, b]` (default to :math:`[0, 1]`) and :math:`\mathrm{kl}(x, y)` is the Kullback-Leibler divergence between two distributions of means x and y (see :mod:`Arms.kullback`), 44 | and c is the parameter (default to 1). 45 | """ 46 | if self.pulls[arm] < 1: 47 | return float('+inf') 48 | else: 49 | # XXX We could adapt tolerance to the value of self.t 50 | return self.klucb(self.rewards[arm] / self.pulls[arm], self.c * log(self.horizon) / self.pulls[arm], self.tolerance) 51 | 52 | def computeAllIndex(self): 53 | """ Compute the current indexes for all arms, in a vectorized manner.""" 54 | indexes = self.klucb_vect(self.rewards / self.pulls, self.c * np.log(self.horizon) / self.pulls, self.tolerance) 55 | indexes[self.pulls < 1] = float('+inf') 56 | self.index[:] = indexes 57 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/klUCBHPlus.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The improved kl-UCB-H+ policy, for one-parameter exponential distributions. 3 | Reference: [Lai 87](https://projecteuclid.org/download/pdf_1/euclid.aos/1176350495) 4 | """ 5 | from __future__ import division, print_function # Python 2 compatibility 6 | 7 | __author__ = "Lilian Besson" 8 | __version__ = "0.1" 9 | 10 | from math import log 11 | import numpy as np 12 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 13 | 14 | try: 15 | from .kullback import klucbBern 16 | from .klUCB import klUCB, c 17 | except ImportError: 18 | from kullback import klucbBern 19 | from klUCB import klUCB, c 20 | 21 | 22 | class klUCBHPlus(klUCB): 23 | """ The improved kl-UCB-H+ policy, for one-parameter exponential distributions. 24 | Reference: [Lai 87](https://projecteuclid.org/download/pdf_1/euclid.aos/1176350495) 25 | """ 26 | 27 | def __init__(self, nbArms, horizon=None, tolerance=1e-4, klucb=klucbBern, c=c, lower=0., amplitude=1.): 28 | super(klUCBHPlus, self).__init__(nbArms, tolerance=tolerance, klucb=klucb, c=c, lower=lower, amplitude=amplitude) 29 | self.horizon = int(horizon) #: Parameter :math:`T` = known horizon of the experiment. 30 | 31 | def __str__(self): 32 | return r"kl-UCB-H+($T={}$, {}{})".format(self.horizon, "" if self.c == 1 else r"$c={:.3g}$".format(self.c), self.klucb.__name__[5:]) 33 | 34 | def computeIndex(self, arm): 35 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k: 36 | 37 | .. math:: 38 | 39 | \hat{\mu}_k(t) &= \frac{X_k(t)}{N_k(t)}, \\ 40 | U_k(t) &= \sup\limits_{q \in [a, b]} \left\{ q : \mathrm{kl}(\hat{\mu}_k(t), q) \leq \frac{c \log(T / N_k(t))}{N_k(t)} \right\},\\ 41 | I_k(t) &= U_k(t). 42 | 43 | If rewards are in :math:`[a, b]` (default to :math:`[0, 1]`) and :math:`\mathrm{kl}(x, y)` is the Kullback-Leibler divergence between two distributions of means x and y (see :mod:`Arms.kullback`), 44 | and c is the parameter (default to 1). 45 | """ 46 | if self.pulls[arm] < 1: 47 | return float('+inf') 48 | else: 49 | # XXX We could adapt tolerance to the value of self.t 50 | return self.klucb(self.rewards[arm] / self.pulls[arm], self.c * log(self.horizon / self.pulls[arm]) / self.pulls[arm], self.tolerance) 51 | 52 | def computeAllIndex(self): 53 | """ Compute the current indexes for all arms, in a vectorized manner.""" 54 | indexes = self.klucb_vect(self.rewards / self.pulls, self.c * np.log(self.horizon / self.pulls) / self.pulls, self.tolerance) 55 | indexes[self.pulls < 1] = float('+inf') 56 | self.index[:] = indexes 57 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/klUCBPlus.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The improved kl-UCB policy, for one-parameter exponential distributions. 3 | Reference: [Cappé et al. 13](https://arxiv.org/pdf/1210.1136.pdf) 4 | """ 5 | from __future__ import division, print_function # Python 2 compatibility 6 | 7 | __author__ = "Lilian Besson" 8 | __version__ = "0.1" 9 | 10 | from math import log 11 | import numpy as np 12 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 13 | 14 | try: 15 | from .klUCB import klUCB 16 | except ImportError: 17 | from klUCB import klUCB 18 | 19 | 20 | class klUCBPlus(klUCB): 21 | """ The improved kl-UCB policy, for one-parameter exponential distributions. 22 | Reference: [Cappé et al. 13](https://arxiv.org/pdf/1210.1136.pdf) 23 | """ 24 | 25 | def __str__(self): 26 | name = self.klucb.__name__[5:] 27 | if name == "Bern": name = "" 28 | complement = "{}{}".format(name, "" if self.c == 1 else r"$c={:.3g}$".format(self.c)) 29 | if complement != "": complement = "({})".format(complement) 30 | return r"kl-UCB$^+${}".format(complement) 31 | 32 | def computeIndex(self, arm): 33 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k: 34 | 35 | .. math:: 36 | 37 | \hat{\mu}_k(t) &= \frac{X_k(t)}{N_k(t)}, \\ 38 | U_k(t) &= \sup\limits_{q \in [a, b]} \left\{ q : \mathrm{kl}(\hat{\mu}_k(t), q) \leq \frac{c \log(t / N_k(t))}{N_k(t)} \right\},\\ 39 | I_k(t) &= U_k(t). 40 | 41 | If rewards are in :math:`[a, b]` (default to :math:`[0, 1]`) and :math:`\mathrm{kl}(x, y)` is the Kullback-Leibler divergence between two distributions of means x and y (see :mod:`Arms.kullback`), 42 | and c is the parameter (default to 1). 43 | """ 44 | if self.pulls[arm] < 1: 45 | return float('+inf') 46 | else: 47 | # XXX We could adapt tolerance to the value of self.t 48 | return self.klucb(self.rewards[arm] / self.pulls[arm], self.c * log(self.t / self.pulls[arm]) / self.pulls[arm], self.tolerance) 49 | 50 | def computeAllIndex(self): 51 | """ Compute the current indexes for all arms, in a vectorized manner.""" 52 | indexes = self.klucb_vect(self.rewards / self.pulls, self.c * np.log(self.t / self.pulls) / self.pulls, self.tolerance) 53 | indexes[self.pulls < 1] = float('+inf') 54 | self.index[:] = indexes 55 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/klUCBloglog.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ The generic kl-UCB policy for one-parameter exponential distributions. 3 | By default, it assumes Bernoulli arms. 4 | Note: using log(t) + c log(log(t)) for the KL-UCB index of just log(t) 5 | Reference: [Garivier & Cappé - COLT, 2011]. 6 | """ 7 | from __future__ import division, print_function # Python 2 compatibility 8 | 9 | __author__ = "Lilian Besson" 10 | __version__ = "0.9" 11 | 12 | from math import log 13 | import numpy as np 14 | np.seterr(divide='ignore') # XXX dangerous in general, controlled here! 15 | 16 | try: 17 | from .klUCB import klUCB 18 | except ImportError: 19 | from klUCB import klUCB 20 | 21 | #: Default value for the constant c used in the computation of KL-UCB index. 22 | c = 3 #: default value, as it was in pymaBandits v1.0 23 | # c = 1 #: as suggested in the Theorem 1 in https://arxiv.org/pdf/1102.2490.pdf 24 | 25 | 26 | class klUCBloglog(klUCB): 27 | """ The generic kl-UCB policy for one-parameter exponential distributions. 28 | By default, it assumes Bernoulli arms. 29 | Note: using log(t) + c log(log(t)) for the KL-UCB index of just log(t) 30 | Reference: [Garivier & Cappé - COLT, 2011]. 31 | """ 32 | 33 | # def __init__(self, nbArms, tolerance=TOLERANCE, klucb=klucbBern, c=c, lower=0., amplitude=1.): 34 | # super(klUCB, self).__init__(nbArms, lower=lower, amplitude=amplitude) 35 | # self.c = c #: Parameter c 36 | # self.klucb = np.vectorize(klucb) #: kl function to use 37 | # self.klucb.__name__ = klucb.__name__ 38 | # self.tolerance = tolerance #: Numerical tolerance 39 | 40 | def __str__(self): 41 | name = self.klucb.__name__[5:] 42 | if name == "Bern": name = "" 43 | complement = "{}{}".format(name, "" if self.c == 3 else r"$c={:.3g}$".format(self.c)) 44 | if complement != "": complement = "({})".format(complement) 45 | return r"kl-UCB{}".format(complement) 46 | 47 | def computeIndex(self, arm): 48 | r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k: 49 | 50 | .. math:: 51 | 52 | \hat{\mu}_k(t) &= \frac{X_k(t)}{N_k(t)}, \\ 53 | U_k(t) &= \sup\limits_{q \in [a, b]} \left\{ q : \mathrm{kl}(\hat{\mu}_k(t), q) \leq \frac{\log(t) + c \log(\max(1, \log(t)))}{N_k(t)} \right\},\\ 54 | I_k(t) &= U_k(t). 55 | 56 | If rewards are in :math:`[a, b]` (default to :math:`[0, 1]`) and :math:`\mathrm{kl}(x, y)` is the Kullback-Leibler divergence between two distributions of means x and y (see :mod:`Arms.kullback`), 57 | and c is the parameter (default to 1). 58 | """ 59 | if self.pulls[arm] < 1: 60 | return float('+inf') 61 | else: 62 | # XXX We could adapt tolerance to the value of self.t 63 | return self.klucb(self.rewards[arm] / self.pulls[arm], (log(self.t) + self.c * log(max(1, log(self.t)))) / self.pulls[arm], self.tolerance) 64 | 65 | def computeAllIndex(self): 66 | """ Compute the current indexes for all arms, in a vectorized manner.""" 67 | indexes = self.klucb_vect(self.rewards / self.pulls, (np.log(self.t) + self.c * np.log(np.maximum(1., np.log(self.t)))) / self.pulls, self.tolerance) 68 | indexes[self.pulls < 1] = float('+inf') 69 | self.index[:] = indexes 70 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Basic setup.py to compile a Cython extension. 4 | It is used to compile the ``kullback_cython`` extension, by running:: 5 | 6 | $ python setup.py build_ext --inplace 7 | 8 | You can also use [pyximport](http://docs.cython.org/en/latest/src/tutorial/cython_tutorial.html#pyximport-cython-compilation-for-developers) to import the ``kullback_cython`` module transparently: 9 | 10 | >>> import pyximport; pyximport.install() 11 | >>> import kullback_cython as kullback 12 | >>> # then use kullback.klucbBern or others, as if they came from the pure Python version! 13 | """ 14 | from distutils.core import setup 15 | from distutils.extension import Extension 16 | from Cython.Build import cythonize 17 | 18 | extensions = [ 19 | # Extension("kullback_cython", ["kullback_cython.pyx"]), 20 | # XXX also build the extension with full name? 21 | Extension("SMPyBandits.Policies.kullback_cython", ["kullback_cython.pyx"]), 22 | ] 23 | 24 | setup( 25 | ext_modules = cythonize(extensions, compiler_directives={ 26 | 'embedsignature': True, 27 | 'language_level': 3, 28 | 'warn.undeclared': True, 29 | 'warn.unreachable': True, 30 | 'warn.maybe_uninitialized': True, 31 | 'warn.unused': True, 32 | 'warn.unused_arg': True, 33 | 'warn.unused_result': True, 34 | 'warn.multiple_declarators': True, 35 | }) 36 | ) 37 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/usenumba.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Import numba.jit or a dummy decorator. 3 | """ 4 | from __future__ import division, print_function # Python 2 compatibility 5 | 6 | __author__ = "Lilian Besson" 7 | __version__ = "0.6" 8 | 9 | #: Configure the use of numba 10 | USE_NUMBA = False 11 | USE_NUMBA = True # XXX Experimental 12 | 13 | if not USE_NUMBA: 14 | print("Warning: numba.jit seems to be disabled. Using a dummy decorator for numba.jit() ...") # DEBUG 15 | 16 | # DONE I tried numba.jit() on these functions, and it DOES not give any speedup...:-( sad sad ! 17 | try: 18 | from numba.decorators import jit 19 | import locale # See this bug, http://numba.pydata.org/numba-doc/dev/user/faq.html#llvm-locale-bug 20 | locale.setlocale(locale.LC_NUMERIC, 'C') 21 | # print("Info: numba.jit seems to be available.") # DEBUG 22 | except ImportError: 23 | # print("Warning: numba.jit seems to not be available. Using a dummy decorator for numba.jit() ...\nIf you want the speed up brought by numba.jit, try to manually install numba and check that it works (installing llvmlite can be tricky, cf. https://github.com/numba/numba#custom-python-environments") # DEBUG 24 | USE_NUMBA = False 25 | 26 | if not USE_NUMBA: 27 | from functools import wraps 28 | 29 | def jit(f): 30 | """Fake numba.jit decorator.""" 31 | return f # XXX isn't it enough?! 32 | # @wraps(f) 33 | # def wrapper(*args, **kwargs): 34 | # """Fake docstring, shouldn't be used thanks to wraps.""" 35 | # return f(*args, **kwargs) 36 | # return wrapper 37 | 38 | 39 | # Only export and expose the useful functions defined here 40 | __all__ = ["USE_NUMBA", "jit"] 41 | -------------------------------------------------------------------------------- /SMPyBandits/Policies/with_proba.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Simply defines a function :func:`with_proba` that is used everywhere. 3 | """ 4 | from __future__ import division, print_function # Python 2 compatibility 5 | 6 | __author__ = "Lilian Besson" 7 | __version__ = "0.9" 8 | 9 | from random import random 10 | 11 | 12 | # --- Utility functions 13 | 14 | 15 | def with_proba(epsilon): 16 | r"""Bernoulli test, with probability :math:`\varepsilon`, return `True`, and with probability :math:`1 - \varepsilon`, return `False`. 17 | 18 | Example: 19 | 20 | >>> from random import seed; seed(0) # reproductible 21 | >>> with_proba(0.5) 22 | False 23 | >>> with_proba(0.9) 24 | True 25 | >>> with_proba(0.1) 26 | False 27 | >>> if with_proba(0.2): 28 | ... print("This happens 20% of the time.") 29 | """ 30 | assert 0 <= epsilon <= 1, "Error: for 'with_proba(epsilon)', epsilon = {:.3g} has to be between 0 and 1 to be a valid probability.".format(epsilon) # DEBUG 31 | return random() < epsilon # True with proba epsilon 32 | 33 | 34 | # --- Debugging 35 | 36 | if __name__ == "__main__": 37 | # Code for debugging purposes. 38 | from doctest import testmod 39 | print("\nTesting automatically all the docstring written in each functions of this module :") 40 | testmod(verbose=True) 41 | -------------------------------------------------------------------------------- /SMPyBandits/PoliciesMultiPlayers/BaseCentralizedPolicy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Base class for any centralized policy, for the multi-players setting.""" 3 | from __future__ import division, print_function # Python 2 compatibility 4 | 5 | __author__ = "Lilian Besson" 6 | __version__ = "0.6" 7 | 8 | 9 | class BaseCentralizedPolicy(object): 10 | """ Base class for any centralized policy, for the multi-players setting.""" 11 | 12 | def __init__(self, nbArms): 13 | """ New policy""" 14 | self.nbArms = nbArms 15 | 16 | def __str__(self): 17 | return self.__class__.__name__ 18 | 19 | def startGame(self): 20 | """ Start the simulation.""" 21 | raise NotImplementedError("This method startGame() has to be implemented in the child class inheriting from BaseCentralizedPolicy.") 22 | 23 | def getReward(self, arm, reward): 24 | """ Get a reward from that arm.""" 25 | raise NotImplementedError("This method getReward(arm, reward) has to be implemented in the child class inheriting from BaseCentralizedPolicy.") 26 | 27 | def choice(self): 28 | """ Choose an arm.""" 29 | raise NotImplementedError("This method choice() has to be implemented in the child class inheriting from BaseCentralizedPolicy.") 30 | -------------------------------------------------------------------------------- /SMPyBandits/PoliciesMultiPlayers/BaseMPPolicy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Base class for any multi-players policy. 3 | 4 | - If rewards are not in [0, 1], be sure to give the lower value and the amplitude. Eg, if rewards are in [-3, 3], lower = -3, amplitude = 6. 5 | """ 6 | from __future__ import division, print_function # Python 2 compatibility 7 | 8 | __author__ = "Lilian Besson" 9 | __version__ = "0.3" 10 | 11 | 12 | class BaseMPPolicy(object): 13 | """ Base class for any multi-players policy.""" 14 | 15 | def __init__(self): 16 | """New policy""" 17 | pass 18 | 19 | def __str__(self): 20 | return self.__class__.__name__ 21 | 22 | # --- Proxy methods 23 | 24 | def _startGame_one(self, playerId): 25 | """Forward the call to self._players[playerId].""" 26 | return self._players[playerId].startGame() 27 | 28 | def _getReward_one(self, playerId, arm, reward): 29 | """Forward the call to self._players[playerId].""" 30 | return self._players[playerId].getReward(arm, reward) 31 | 32 | def _choice_one(self, playerId): 33 | """Forward the call to self._players[playerId].""" 34 | return self._players[playerId].choice() 35 | 36 | def _choiceWithRank_one(self, playerId, rank=1): 37 | """Forward the call to self._players[playerId].""" 38 | return self._players[playerId].choiceWithRank(rank) 39 | 40 | def _choiceFromSubSet_one(self, playerId, availableArms='all'): 41 | """Forward the call to self._players[playerId].""" 42 | return self._players[playerId].choiceFromSubSet(availableArms) 43 | 44 | def _choiceMultiple_one(self, playerId, nb=1): 45 | """Forward the call to self._players[playerId].""" 46 | return self._players[playerId].choiceMultiple(nb) 47 | 48 | def _choiceIMP_one(self, playerId, nb=1): 49 | """Forward the call to self._players[playerId].""" 50 | return self._players[playerId].choiceIMP(nb) 51 | 52 | def _estimatedOrder_one(self, playerId): 53 | """Forward the call to self._players[playerId].""" 54 | return self._players[playerId].estimatedOrder() 55 | 56 | def _estimatedBestArms_one(self, playerId, M=1): 57 | """Forward the call to self._players[playerId].""" 58 | return self._players[playerId].estimatedBestArms(M=M) 59 | -------------------------------------------------------------------------------- /SMPyBandits/PoliciesMultiPlayers/CentralizedIMP.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ CentralizedIMP: a multi-player policy where ONE policy is used by a centralized agent; asking the policy to select nbPlayers arms at each step, using an hybrid strategy: choose nb-1 arms with maximal empirical averages, then 1 arm with maximal index. Cf. algorithm IMP-TS [Komiyama, Honda, Nakagawa, 2016, arXiv 1506.00779]. 3 | """ 4 | from __future__ import division, print_function # Python 2 compatibility 5 | 6 | __author__ = "Lilian Besson" 7 | __version__ = "0.2" 8 | 9 | import numpy as np 10 | 11 | from .CentralizedMultiplePlay import CentralizedMultiplePlay 12 | 13 | 14 | # --- Class for the mother 15 | 16 | class CentralizedIMP(CentralizedMultiplePlay): 17 | """ CentralizedIMP: a multi-player policy where ONE policy is used by a centralized agent; asking the policy to select nbPlayers arms at each step, using an hybrid strategy: choose nb-1 arms with maximal empirical averages, then 1 arm with maximal index. Cf. algorithm IMP-TS [Komiyama, Honda, Nakagawa, 2016, arXiv 1506.00779]. 18 | """ 19 | 20 | def _choice_one(self, playerId): 21 | """Use `choiceIMP` for each player.""" 22 | if playerId == 0: # For the first player, run the method 23 | # FIXED sort it then apply affectation_order, to fix its order ==> will have a fixed nb of switches for CentralizedMultiplePlay 24 | if self.uniformAllocation: 25 | self.choices = self.player.choiceIMP(self.nbPlayers) 26 | else: 27 | self.choices = np.sort(self.player.choiceIMP(self.nbPlayers))[self.affectation_order] # XXX Increasing order... 28 | # self.choices = np.sort(self.player.choiceMultiple(self.nbPlayers))[self.affectation_order][::-1] # XXX Decreasing order... 29 | # print("At time t = {} the {} centralized policy chosed arms = {} ...".format(self.player.t, self, self.choices)) # DEBUG 30 | # For the all players, use the pre-computed result 31 | return self.choices[playerId] 32 | -------------------------------------------------------------------------------- /SMPyBandits/PoliciesMultiPlayers/README.md: -------------------------------------------------------------------------------- 1 | # [Multi-Player policies](https://smpybandits.github.io/docs/PoliciesMultiPlayers.html) 2 | > See here the documentation: [docs/PoliciesMultiPlayers](https://smpybandits.github.io/docs/PoliciesMultiPlayers.html) 3 | 4 | 5 | ## List of Policies 6 | `PoliciesMultiPlayers` folder : contains various collision-avoidance protocol for the multi-players setting. 7 | 8 | - [`Selfish`](Selfish.py): a multi-player policy where every player is selfish, they do not try to handle the collisions. 9 | 10 | - [`CentralizedNotFair`](CentralizedNotFair.py): a multi-player policy which uses a centralize intelligence to affect users to a FIXED arm. 11 | - [`CentralizedFair`](CentralizedFair.py): a multi-player policy which uses a centralize intelligence to affect users an offset, each one take an orthogonal arm based on (offset + t) % nbArms. 12 | 13 | - [`CentralizedMultiplePlay`](CentralizedMultiplePlay.py) and [`CentralizedIMP`](CentralizedIMP.py): multi-player policies that use centralized but non-omniscient learning to select K = nbPlayers arms at each time step. 14 | 15 | - [`OracleNotFair`](OracleNotFair.py): a multi-player policy with full knowledge and centralized intelligence to affect users to a FIXED arm, among the best arms. 16 | - [`OracleFair`](OracleFair.py): a multi-player policy which uses a centralized intelligence to affect users an offset, each one take an orthogonal arm based on (offset + t) % nbBestArms, among the best arms. 17 | 18 | - [`rhoRand`](rhoRand.py), [`ALOHA`](ALOHA.py): implementation of generic collision avoidance algorithms, relying on a single-player bandit policy (eg. [`UCB`](UCB.py), [`Thompson`](Thompson.py) etc). And variants, [`rhoRandRand`](rhoRandRand.py), [`rhoRandSticky`](rhoRandSticky.py), [`rhoRandRotating`](rhoRandRotating.py), [`rhoRandEst`](rhoRandEst.py), [`rhoLearn`](rhoLearn.py), [`rhoLearnEst`](rhoLearnEst.py), [`rhoLearnExp3`](rhoLearnExp3.py), [`rhoRandALOHA`](rhoRandALOHA.py), 19 | - [`rhoCentralized`](rhoCentralized.py) is a semi-centralized version where orthogonal ranks 1..M are given to the players, instead of just giving them the value of M, but a decentralized learning policy is still used to learn the best arms. 20 | - [`RandTopM`](RandTopM.py) is another approach, similar to [`rhoRandSticky`](rhoRandSticky.py) and [`MusicalChair`](MusicalChair.py), but we hope it will be better, and we succeed in analyzing more easily. 21 | 22 | ## API 23 | All policies have the same interface, as described in [`BaseMPPolicy`](BaseMPPolicy.py) for decentralized policies, 24 | and [`BaseCentralizedPolicy`](BaseCentralizedPolicy.py) for centralized policies, 25 | in order to use them in any experiment with the following approach: 26 | 27 | ```python 28 | my_policy_MP = Policy_MP(nbPlayers, nbArms) 29 | children = my_policy_MP.children # get a list of usable single-player policies 30 | for one_policy in children: 31 | one_policy.startGame() # start the game 32 | for t in range(T): 33 | for i in range(nbPlayers): 34 | k_t[i] = children[i].choice() # chose one arm, for each player 35 | for k in range(nbArms): 36 | players_who_played_k = [ k_t[i] for i in range(nbPlayers) if k_t[i] == k ] 37 | reward = reward_t[k] = sampled from the arm k # sample a reward 38 | if len(players_who_played_k) > 1: 39 | reward = 0 40 | for i in players_who_played_k: 41 | children[i].getReward(k, reward) 42 | ``` -------------------------------------------------------------------------------- /SMPyBandits/PoliciesMultiPlayers/with_proba.py: -------------------------------------------------------------------------------- 1 | ../Policies/with_proba.py -------------------------------------------------------------------------------- /SMPyBandits/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Open-Source Python package for Single- and Multi-Players multi-armed Bandits algorithms. 5 | 6 | - Homepage: https://SMPyBandits.GitHub.io/ 7 | - Author: Lilian Besson and contributors 8 | - License: MIT 9 | - Date: October 2019 10 | """ 11 | from __future__ import division, print_function # Python 2 compatibility 12 | 13 | __author__ = "Lilian Besson" 14 | __version__ = "0.9.7" 15 | 16 | try: 17 | # from .Arms import * 18 | from SMPyBandits import Arms 19 | except ImportError: 20 | pass 21 | 22 | try: 23 | # from .Environment import * 24 | from SMPyBandits import Environment 25 | except ImportError: 26 | pass 27 | 28 | try: 29 | # from .Policies import * 30 | from SMPyBandits import Policies 31 | except ImportError: 32 | pass 33 | 34 | # try: 35 | # # from .Policies.Posterior import * 36 | # from SMPyBandits.Policies import Posterior 37 | # except ImportError: 38 | # pass 39 | 40 | try: 41 | # from .PoliciesMultiPlayers import * 42 | from SMPyBandits import PoliciesMultiPlayers 43 | except ImportError: 44 | pass 45 | -------------------------------------------------------------------------------- /SMPyBandits/example_of_main_singleplayer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | An example of a simple 'main' script. 5 | Main scripts load the config, run the simulations, and plot them, for the single player case. 6 | """ 7 | from __future__ import division, print_function # Python 2 compatibility 8 | 9 | __author__ = "Lilian Besson" 10 | __version__ = "0.9" 11 | 12 | import sys 13 | if __name__ != '__main__': 14 | sys.exit(0) 15 | 16 | from Environment import Evaluator, notify 17 | 18 | if 'very_simple_configuration' in sys.argv or 'very_simple_configuration.py' in sys.argv: 19 | from very_simple_configuration import configuration 20 | else: 21 | from example_of_configuration_singleplayer import configuration 22 | 23 | configuration['showplot'] = True 24 | 25 | evaluation = Evaluator(configuration) 26 | 27 | # Start the evaluation and then print final ranking and plot, for each environment 28 | for envId, env in enumerate(evaluation.envs): 29 | # Evaluate just that env 30 | evaluation.startOneEnv(envId, env) 31 | 32 | # Compare them 33 | for envId, env in enumerate(evaluation.envs): 34 | evaluation.plotHistoryOfMeans(envId) # XXX To plot without saving 35 | 36 | print("\nGiving all the vector of final regrets ...") 37 | evaluation.printLastRegrets(envId) 38 | print("\nGiving the final ranking ...") 39 | evaluation.printFinalRanking(envId) 40 | 41 | print("\n\n- Plotting the last regrets...") 42 | evaluation.plotLastRegrets(envId, boxplot=True) 43 | 44 | print("\nGiving the mean and std running times ...") 45 | evaluation.printRunningTimes(envId) 46 | evaluation.plotRunningTimes(envId) 47 | 48 | print("\nGiving the mean and std running times ...") 49 | evaluation.printMemoryConsumption(envId) 50 | evaluation.plotMemoryConsumption(envId) 51 | 52 | print("\n\n- Plotting the mean reward...") 53 | evaluation.plotRegrets(envId, meanReward=True) 54 | 55 | print("\n\n- Plotting the regret...") 56 | evaluation.plotRegrets(envId) 57 | 58 | print("\n- Plotting the probability of picking the best arm of time...") 59 | evaluation.plotBestArmPulls(envId) 60 | 61 | print("\n- Plotting the histograms of regrets...") 62 | evaluation.plotLastRegrets(envId, sharex=True, sharey=True) 63 | 64 | # Done 65 | print("Done for simulations example_of_main_singleplayer ...") 66 | notify("Done for simulations example_of_main_singleplayer ...") 67 | -------------------------------------------------------------------------------- /SMPyBandits/include/README.md: -------------------------------------------------------------------------------- 1 | # Include 2 | - Just [docopt-cpp](https://github.com/docopt/docopt.cpp) -------------------------------------------------------------------------------- /SMPyBandits/include/docopt_util.h: -------------------------------------------------------------------------------- 1 | // 2 | // docopt_util.h 3 | // docopt 4 | // 5 | // Created by Jared Grubb on 2013-11-04. 6 | // Copyright (c) 2013 Jared Grubb. All rights reserved. 7 | // 8 | 9 | #ifndef docopt_docopt_util_h 10 | #define docopt_docopt_util_h 11 | 12 | #if DOCTOPT_USE_BOOST_REGEX 13 | #include 14 | namespace std { 15 | using boost::regex; 16 | using boost::sregex_token_iterator; 17 | } 18 | #else 19 | #include 20 | #endif 21 | 22 | #pragma mark - 23 | #pragma mark General utility 24 | 25 | namespace { 26 | bool starts_with(std::string const& str, std::string const& prefix) 27 | { 28 | if (str.length() < prefix.length()) 29 | return false; 30 | return std::equal(prefix.begin(), prefix.end(), 31 | str.begin()); 32 | } 33 | 34 | std::string trim(std::string&& str, 35 | const std::string& whitespace = " \t\n") 36 | { 37 | const auto strEnd = str.find_last_not_of(whitespace); 38 | if (strEnd==std::string::npos) 39 | return {}; // no content 40 | str.erase(strEnd+1); 41 | 42 | const auto strBegin = str.find_first_not_of(whitespace); 43 | str.erase(0, strBegin); 44 | 45 | return std::move(str); 46 | } 47 | 48 | std::vector split(std::string const& str, size_t pos = 0) 49 | { 50 | const char* const anySpace = " \t\r\n\v\f"; 51 | 52 | std::vector ret; 53 | while (pos != std::string::npos) { 54 | auto start = str.find_first_not_of(anySpace, pos); 55 | if (start == std::string::npos) break; 56 | 57 | auto end = str.find_first_of(anySpace, start); 58 | auto size = end==std::string::npos ? end : end-start; 59 | ret.emplace_back(str.substr(start, size)); 60 | 61 | pos = end; 62 | } 63 | 64 | return ret; 65 | } 66 | 67 | std::tuple partition(std::string str, std::string const& point) 68 | { 69 | std::tuple ret; 70 | 71 | auto i = str.find(point); 72 | 73 | if (i == std::string::npos) { 74 | // no match: string goes in 0th spot only 75 | } else { 76 | std::get<2>(ret) = str.substr(i + point.size()); 77 | std::get<1>(ret) = point; 78 | str.resize(i); 79 | } 80 | std::get<0>(ret) = std::move(str); 81 | 82 | return ret; 83 | } 84 | 85 | template 86 | std::string join(I iter, I end, std::string const& delim) { 87 | if (iter==end) 88 | return {}; 89 | 90 | std::string ret = *iter; 91 | for(++iter; iter!=end; ++iter) { 92 | ret.append(delim); 93 | ret.append(*iter); 94 | } 95 | return ret; 96 | } 97 | 98 | std::vector regex_split(std::string const& text, std::regex const& re) 99 | { 100 | std::vector ret; 101 | for (auto it = std::sregex_token_iterator(text.begin(), text.end(), re, -1); 102 | it != std::sregex_token_iterator(); 103 | ++it) { 104 | ret.emplace_back(*it); 105 | } 106 | return ret; 107 | } 108 | } 109 | 110 | namespace docopt { 111 | template 112 | inline void hash_combine(std::size_t& seed, T const& v) 113 | { 114 | // stolen from boost::hash_combine 115 | std::hash hasher; 116 | seed ^= hasher(v) + 0x9e3779b9 + (seed<<6) + (seed>>2); 117 | } 118 | } 119 | 120 | #endif 121 | -------------------------------------------------------------------------------- /SMPyBandits/very_simple_configuration.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | An very simple configuration file to run some basic simulations about stationary multi-armed bandits. 4 | """ 5 | 6 | from Arms import * 7 | 8 | from Environment import MAB 9 | 10 | from Policies import * 11 | 12 | # --- Parameters of the experiments 13 | HORIZON = 30 14 | 15 | REPETITIONS = 1 16 | 17 | NB_ARMS = 5 18 | 19 | ARM_TYPE = Bernoulli 20 | 21 | # Like http://localhost/publis/tiny-d3-bandit-animation.git/index.html?T=30&MU=0.1,0.2,0.3,0.4,0.9 22 | MEANS = [0.1, 0.2, 0.3, 0.4, 0.9] 23 | 24 | 25 | #: This dictionary configures the experiments 26 | configuration = { 27 | # --- Duration of the experiment 28 | "horizon": HORIZON, 29 | # --- Number of repetition of the experiment (to have an average) 30 | "repetitions": REPETITIONS, 31 | # --- Parameters for the use of joblib.Parallel 32 | "n_jobs": 1, # = nb of CPU cores 33 | "verbosity": 6, # Max joblib verbosity 34 | # --- Other parameters for the Evaluator 35 | "finalRanksOnAverage": True, # Use an average instead of the last value for the final ranking of the tested players 36 | "averageOn": 1e-3, # Average the final rank on the 1.% last time steps 37 | # --- Should we plot the lower-bounds or not? 38 | "plot_lowerbounds": False, # XXX Default 39 | # --- Arms 40 | "environment": [ 41 | { # Use vector from command line 42 | "arm_type": ARM_TYPE, 43 | "params": MEANS 44 | }, 45 | ], 46 | } 47 | 48 | configuration.update({ 49 | "policies": [ 50 | # --- Full or partial knowledge algorithms 51 | { "archtype": TakeFixedArm, "params": { "armIndex": 0 }}, # Take worse arm! 52 | { "archtype": TakeFixedArm, "params": { "armIndex": 1 }}, # Take second worse arm! 53 | { "archtype": TakeFixedArm, "params": { "armIndex": 2 }}, # Take third worse arm! 54 | { "archtype": TakeFixedArm, "params": { "armIndex": 3 }}, # Take forth worse arm! 55 | { "archtype": TakeFixedArm, "params": { "armIndex": 4 }}, # Take fifth worse arm! 56 | # --- Stupid algorithms 57 | { 58 | "archtype": Uniform, # The stupidest policy, fully uniform 59 | "params": {} 60 | }, 61 | # --- UCB algorithm 62 | { 63 | "archtype": UCB, # UCB with alpha=1 parameter 64 | "params": {} 65 | }, 66 | # --- Thompson algorithm 67 | { 68 | "archtype": Thompson, 69 | "params": {} 70 | }, 71 | # --- KL UCB algorithm 72 | { 73 | "archtype": klUCB, 74 | "params": {} 75 | }, 76 | # --- BESA algorithm 77 | { 78 | "archtype": BESA, 79 | "params": { 80 | "horizon": HORIZON, 81 | } 82 | }, 83 | # --- MOSS algorithm 84 | { 85 | "archtype": MOSS, 86 | "params": {} 87 | }, 88 | # --- Exp3++ algorithm 89 | { 90 | "archtype": Exp3PlusPlus, 91 | "params": {} 92 | }, 93 | ]} 94 | ) 95 | 96 | # DONE 97 | print("Loaded experiments configuration from 'example_of_configuration_singleplayer.py' :") 98 | print("configuration =", configuration) # DEBUG 99 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | 4 | ARG PYTHON_VERSION=3.9 5 | 6 | 7 | # Install necessary building tools and dependencies 8 | RUN apt-get update && apt-get install -y --no-install-recommends \ 9 | build-essential \ 10 | git \ 11 | curl \ 12 | sudo \ 13 | bzip2 \ 14 | libx11-6 \ 15 | ca-certificates \ 16 | libjpeg-dev \ 17 | libpng-dev && \ 18 | rm -rf /var/lib/apt/lists/* 19 | 20 | RUN apt-get update && apt-get -y dist-upgrade && apt-get purge -y libboost-all-dev && \ 21 | apt-get install -f -y libboost-all-dev && \ 22 | rm -rf /var/lib/apt/lists/* 23 | 24 | 25 | # Create a working directory 26 | RUN mkdir /app 27 | WORKDIR /app 28 | 29 | 30 | # Install conda 31 | RUN curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ 32 | chmod +x ~/miniconda.sh && \ 33 | ~/miniconda.sh -b -p /opt/conda && \ 34 | rm ~/miniconda.sh && \ 35 | /opt/conda/bin/conda install -y python=$PYTHON_VERSION numpy pyyaml scipy ipython mkl mkl-include ninja cython typing && \ 36 | /opt/conda/bin/conda clean -ya 37 | 38 | 39 | ENV PATH /opt/conda/bin:$PATH 40 | 41 | RUN apt-get update 42 | RUN apt-get install -y vim lshw 43 | RUN apt-get install -y ghostscript 44 | RUN apt-get install -y poppler-utils 45 | 46 | ENV LANG C.UTF-8 47 | 48 | RUN export CC=mpicc && export HDF5_MPI="ON" && /opt/conda/bin/pip install versioned-hdf5 49 | 50 | # Install data science and machine learning packages using conda 51 | RUN /opt/conda/bin/conda install -y -c conda-forge joblib numba tqdm && \ 52 | /opt/conda/bin/conda install -y -c conda-forge sphinx_rtd_theme recommonmark nbsphinx ipywidgets && \ 53 | /opt/conda/bin/conda clean -ya 54 | 55 | 56 | # Install data science and machine learning packages using conda 57 | RUN /opt/conda/bin/conda install -y -c conda-forge scikit-learn scikit-optimize pandas seaborn && \ 58 | /opt/conda/bin/conda clean -ya 59 | 60 | RUN /opt/conda/bin/conda install -y -c conda-forge matplotlib">=3.4.3" && \ 61 | /opt/conda/bin/conda install -c anaconda python-dateutil && \ 62 | /opt/conda/bin/conda clean -ya 63 | 64 | RUN /opt/conda/bin/conda install -y -c anaconda ujson && \ 65 | /opt/conda/bin/conda clean -ya 66 | 67 | 68 | # Install Jupyter 69 | RUN /opt/conda/bin/conda install -y -c conda-forge jupyterlab jupyter_http_over_ws nodejs">=16.0.0" && \ 70 | /opt/conda/bin/conda clean -ya 71 | 72 | RUN /opt/conda/bin/conda install -y -c anaconda nbformat && \ 73 | /opt/conda/bin/conda install -y ipykernel && \ 74 | /opt/conda/bin/conda clean -ya 75 | RUN jupyter serverextension enable --py jupyter_http_over_ws 76 | 77 | RUN python3 -m ipykernel.kernelspec 78 | -------------------------------------------------------------------------------- /strategic_scripts/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | from pathlib import Path 4 | 5 | from run_experiment import runner 6 | 7 | 8 | if __name__ == "__main__": 9 | # python3 main.py --setup=default.json > log/default.log 2>&1 & 10 | # python3 main.py --setup=05X50.json > log/05X50.log 2>&1 & 11 | # ps | grep python3 main.py 12 | parser = argparse.ArgumentParser(description="Get json file containing experiment setup") 13 | parser.add_argument("--setup", type=str) 14 | parser.add_argument("--policy", type=str) 15 | parser.add_argument("--L", type=int) 16 | args = parser.parse_args() 17 | 18 | setup_file = str(args.setup) 19 | policy_str = str(args.policy) 20 | L = int(args.L) if args.L else None 21 | assert ".json" in setup_file, "setup file name should contain '.json'" 22 | 23 | AVALIABLE_POLICY = [ 24 | 'UCB', 25 | 'H_UCB', 26 | 'RH_UCB', 27 | 'Sampled_R_UCB', 28 | ] 29 | assert policy_str in AVALIABLE_POLICY, "policy is not available" 30 | 31 | with open(setup_file) as json_file: 32 | data = json.load(json_file) 33 | 34 | experiment_name = str(Path(setup_file).stem) 35 | 36 | horizon = data["horizon"] 37 | repetitions = data["repetitions"] 38 | n_jobs = data["n_jobs"] 39 | verbosity = data["verbosity"] 40 | 41 | arm_type = data["arm_type"] 42 | agent_arm_dict = data["agent_arm_dict"] 43 | 44 | save_json = data["save_json"] 45 | save_h5py = data["save_h5py"] 46 | save_pickle = data["save_pickle"] 47 | 48 | runner( 49 | experiment_name, policy_str, 50 | horizon, repetitions, n_jobs, verbosity, 51 | arm_type, agent_arm_dict, L, 52 | save_json, save_h5py, save_pickle 53 | ) 54 | -------------------------------------------------------------------------------- /strategic_scripts/run_h_ucb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # currentShellPID=$(echo $$) 4 | # echo $currentShellPID && taskset -cp 25-49 $currentShellPID && echo $currentShellPID 5 | 6 | 7 | declare -a setupnames=( 8 | "N100_05X100" 9 | ) 10 | 11 | for i in "${setupnames[@]}" 12 | do 13 | python3 main.py --setup=setups/${i}.json --policy=H_UCB > log/${i}_H_UCB.log 2>&1 14 | done 15 | 16 | # python3 main.py --setup=setups/N100_05X100.json --policy=UCB -------------------------------------------------------------------------------- /strategic_scripts/run_rh_ucb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # currentShellPID=$(echo $$) 4 | # echo $currentShellPID && taskset -cp 50-62 $currentShellPID && echo $currentShellPID 5 | 6 | 7 | declare -a setupnames=( 8 | "N100_05X100" 9 | ) 10 | 11 | for i in "${setupnames[@]}" 12 | do 13 | python3 main.py --setup=setups/${i}.json --policy=RH_UCB > log/${i}_RH_UCB.log 2>&1 14 | done 15 | -------------------------------------------------------------------------------- /strategic_scripts/run_sampled_r_ucb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # currentShellPID=$(echo $$) 4 | # echo $currentShellPID && taskset -cp 63-75 $currentShellPID && echo $currentShellPID 5 | 6 | 7 | declare -a basicsetupnames=( 8 | "N100_05X100" 9 | ) 10 | 11 | for i in "${basicsetupnames[@]}" 12 | do 13 | python3 main.py --setup=setups/${i}.json --policy=Sampled_R_UCB --L=5 > log/${i}_Sampled_R_UCB_L5.log 2>&1 14 | done 15 | -------------------------------------------------------------------------------- /strategic_scripts/run_ucb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # currentShellPID=$(echo $$) 4 | # echo $currentShellPID && taskset -cp 0-24 $currentShellPID && echo $currentShellPID 5 | 6 | 7 | declare -a setupnames=( 8 | "N100_05X100" 9 | ) 10 | 11 | for i in "${setupnames[@]}" 12 | do 13 | python3 main.py --setup=setups/${i}.json --policy=UCB > log/${i}_UCB.log 2>&1 14 | done 15 | 16 | -------------------------------------------------------------------------------- /strategic_scripts/setups/N100_05X100.json: -------------------------------------------------------------------------------- 1 | { 2 | "horizon": 100000, 3 | "repetitions": 100, 4 | "n_jobs": 50, 5 | "verbosity": 0, 6 | 7 | "arm_type": "Bernoulli", 8 | "agent_arm_dict": { 9 | "0": {"param": [0.9], "repeat": [1]}, 10 | "1": {"param": [0.8], "repeat": [1]}, 11 | "2": {"param": [0.7], "repeat": [1]}, 12 | "3": {"param": [0.6], "repeat": [1]}, 13 | "4": {"param": [0.5], "repeat": [100]} 14 | }, 15 | 16 | "save_json": true, 17 | "save_h5py": false, 18 | "save_pickle": false 19 | } 20 | -------------------------------------------------------------------------------- /strategic_scripts/setups/N100_05X200.json: -------------------------------------------------------------------------------- 1 | { 2 | "horizon": 100000, 3 | "repetitions": 100, 4 | "n_jobs": 50, 5 | "verbosity": 0, 6 | 7 | "arm_type": "Bernoulli", 8 | "agent_arm_dict": { 9 | "0": {"param": [0.9], "repeat": [1]}, 10 | "1": {"param": [0.8], "repeat": [1]}, 11 | "2": {"param": [0.7], "repeat": [1]}, 12 | "3": {"param": [0.6], "repeat": [1]}, 13 | "4": {"param": [0.5], "repeat": [200]} 14 | }, 15 | 16 | "save_json": true, 17 | "save_h5py": false, 18 | "save_pickle": false 19 | } 20 | -------------------------------------------------------------------------------- /strategic_scripts/setups/N100_05X300.json: -------------------------------------------------------------------------------- 1 | { 2 | "horizon": 100000, 3 | "repetitions": 100, 4 | "n_jobs": 50, 5 | "verbosity": 0, 6 | 7 | "arm_type": "Bernoulli", 8 | "agent_arm_dict": { 9 | "0": {"param": [0.9], "repeat": [1]}, 10 | "1": {"param": [0.8], "repeat": [1]}, 11 | "2": {"param": [0.7], "repeat": [1]}, 12 | "3": {"param": [0.6], "repeat": [1]}, 13 | "4": {"param": [0.5], "repeat": [300]} 14 | }, 15 | 16 | "save_json": true, 17 | "save_h5py": false, 18 | "save_pickle": false 19 | } 20 | -------------------------------------------------------------------------------- /strategic_scripts/setups/N100_05X400.json: -------------------------------------------------------------------------------- 1 | { 2 | "horizon": 100000, 3 | "repetitions": 100, 4 | "n_jobs": 50, 5 | "verbosity": 0, 6 | 7 | "arm_type": "Bernoulli", 8 | "agent_arm_dict": { 9 | "0": {"param": [0.9], "repeat": [1]}, 10 | "1": {"param": [0.8], "repeat": [1]}, 11 | "2": {"param": [0.7], "repeat": [1]}, 12 | "3": {"param": [0.6], "repeat": [1]}, 13 | "4": {"param": [0.5], "repeat": [400]} 14 | }, 15 | 16 | "save_json": true, 17 | "save_h5py": false, 18 | "save_pickle": false 19 | } 20 | -------------------------------------------------------------------------------- /strategic_scripts/setups/N100_05X500.json: -------------------------------------------------------------------------------- 1 | { 2 | "horizon": 100000, 3 | "repetitions": 100, 4 | "n_jobs": 50, 5 | "verbosity": 0, 6 | 7 | "arm_type": "Bernoulli", 8 | "agent_arm_dict": { 9 | "0": {"param": [0.9], "repeat": [1]}, 10 | "1": {"param": [0.8], "repeat": [1]}, 11 | "2": {"param": [0.7], "repeat": [1]}, 12 | "3": {"param": [0.6], "repeat": [1]}, 13 | "4": {"param": [0.5], "repeat": [500]} 14 | }, 15 | 16 | "save_json": true, 17 | "save_h5py": false, 18 | "save_pickle": false 19 | } 20 | -------------------------------------------------------------------------------- /strategic_scripts/setups/N100_09X100.json: -------------------------------------------------------------------------------- 1 | { 2 | "horizon": 100000, 3 | "repetitions": 100, 4 | "n_jobs": 50, 5 | "verbosity": 0, 6 | 7 | "arm_type": "Bernoulli", 8 | "agent_arm_dict": { 9 | "0": {"param": [0.9], "repeat": [100]}, 10 | "1": {"param": [0.8], "repeat": [1]}, 11 | "2": {"param": [0.7], "repeat": [1]}, 12 | "3": {"param": [0.6], "repeat": [1]}, 13 | "4": {"param": [0.5], "repeat": [1]} 14 | }, 15 | 16 | "save_json": true, 17 | "save_h5py": false, 18 | "save_pickle": false 19 | } 20 | -------------------------------------------------------------------------------- /strategic_scripts/setups/N100_09X200.json: -------------------------------------------------------------------------------- 1 | { 2 | "horizon": 100000, 3 | "repetitions": 100, 4 | "n_jobs": 50, 5 | "verbosity": 0, 6 | 7 | "arm_type": "Bernoulli", 8 | "agent_arm_dict": { 9 | "0": {"param": [0.9], "repeat": [200]}, 10 | "1": {"param": [0.8], "repeat": [1]}, 11 | "2": {"param": [0.7], "repeat": [1]}, 12 | "3": {"param": [0.6], "repeat": [1]}, 13 | "4": {"param": [0.5], "repeat": [1]} 14 | }, 15 | 16 | "save_json": true, 17 | "save_h5py": false, 18 | "save_pickle": false 19 | } 20 | -------------------------------------------------------------------------------- /strategic_scripts/setups/N100_09X300.json: -------------------------------------------------------------------------------- 1 | { 2 | "horizon": 100000, 3 | "repetitions": 100, 4 | "n_jobs": 50, 5 | "verbosity": 0, 6 | 7 | "arm_type": "Bernoulli", 8 | "agent_arm_dict": { 9 | "0": {"param": [0.9], "repeat": [300]}, 10 | "1": {"param": [0.8], "repeat": [1]}, 11 | "2": {"param": [0.7], "repeat": [1]}, 12 | "3": {"param": [0.6], "repeat": [1]}, 13 | "4": {"param": [0.5], "repeat": [1]} 14 | }, 15 | 16 | "save_json": true, 17 | "save_h5py": false, 18 | "save_pickle": false 19 | } 20 | -------------------------------------------------------------------------------- /strategic_scripts/setups/N100_09X400.json: -------------------------------------------------------------------------------- 1 | { 2 | "horizon": 100000, 3 | "repetitions": 100, 4 | "n_jobs": 50, 5 | "verbosity": 0, 6 | 7 | "arm_type": "Bernoulli", 8 | "agent_arm_dict": { 9 | "0": {"param": [0.9], "repeat": [400]}, 10 | "1": {"param": [0.8], "repeat": [1]}, 11 | "2": {"param": [0.7], "repeat": [1]}, 12 | "3": {"param": [0.6], "repeat": [1]}, 13 | "4": {"param": [0.5], "repeat": [1]} 14 | }, 15 | 16 | "save_json": true, 17 | "save_h5py": false, 18 | "save_pickle": false 19 | } 20 | -------------------------------------------------------------------------------- /strategic_scripts/setups/N100_09X500.json: -------------------------------------------------------------------------------- 1 | { 2 | "horizon": 100000, 3 | "repetitions": 100, 4 | "n_jobs": 50, 5 | "verbosity": 0, 6 | 7 | "arm_type": "Bernoulli", 8 | "agent_arm_dict": { 9 | "0": {"param": [0.9], "repeat": [500]}, 10 | "1": {"param": [0.8], "repeat": [1]}, 11 | "2": {"param": [0.7], "repeat": [1]}, 12 | "3": {"param": [0.6], "repeat": [1]}, 13 | "4": {"param": [0.5], "repeat": [1]} 14 | }, 15 | 16 | "save_json": true, 17 | "save_h5py": false, 18 | "save_pickle": false 19 | } 20 | -------------------------------------------------------------------------------- /strategic_scripts/setups/N100_default.json: -------------------------------------------------------------------------------- 1 | { 2 | "horizon": 100000, 3 | "repetitions": 100, 4 | "n_jobs": 50, 5 | "verbosity": 0, 6 | 7 | "arm_type": "Bernoulli", 8 | "agent_arm_dict": { 9 | "0": {"param": [0.9], "repeat": [1]}, 10 | "1": {"param": [0.8], "repeat": [1]}, 11 | "2": {"param": [0.7], "repeat": [1]}, 12 | "3": {"param": [0.6], "repeat": [1]}, 13 | "4": {"param": [0.5], "repeat": [1]} 14 | }, 15 | 16 | "save_json": true, 17 | "save_h5py": false, 18 | "save_pickle": false 19 | } 20 | -------------------------------------------------------------------------------- /strategic_scripts/setups/N100_rh_ucb_best_10_100_replicate1000X3.json: -------------------------------------------------------------------------------- 1 | { 2 | "horizon": 100000, 3 | "repetitions": 100, 4 | "n_jobs": 50, 5 | "verbosity": 0, 6 | 7 | "arm_type": "Bernoulli", 8 | "agent_arm_dict": { 9 | "0": {"param": [0.9, 0.2, 0.1], "repeat": [10, 100, 100]}, 10 | "1": {"param": [0.8, 0.2, 0.1], "repeat": [10, 100, 100]}, 11 | "2": {"param": [0.7, 0.2, 0.1], "repeat": [1000, 1000, 1000]}, 12 | "3": {"param": [0.6, 0.2, 0.1], "repeat": [1000, 1000, 1000]}, 13 | "4": {"param": [0.5, 0.2, 0.1], "repeat": [1000, 1000, 1000]} 14 | }, 15 | 16 | "save_json": true, 17 | "save_h5py": false, 18 | "save_pickle": false 19 | } 20 | -------------------------------------------------------------------------------- /strategic_scripts/setups/N100_single_origin_arm1000X1.json: -------------------------------------------------------------------------------- 1 | { 2 | "horizon": 100000, 3 | "repetitions": 100, 4 | "n_jobs": 50, 5 | "verbosity": 0, 6 | 7 | "arm_type": "Bernoulli", 8 | "agent_arm_dict": { 9 | "0": {"param": [0.9], "repeat": [1]}, 10 | "1": {"param": [0.8], "repeat": [1]}, 11 | "2": {"param": [0.7], "repeat": [1]}, 12 | "3": {"param": [0.6], "repeat": [1]}, 13 | "4": {"param": [0.5], "repeat": [1000]} 14 | }, 15 | 16 | "save_json": true, 17 | "save_h5py": false, 18 | "save_pickle": false 19 | } 20 | -------------------------------------------------------------------------------- /strategic_scripts/setups/N100_single_origin_arm1000X4.json: -------------------------------------------------------------------------------- 1 | { 2 | "horizon": 100000, 3 | "repetitions": 100, 4 | "n_jobs": 50, 5 | "verbosity": 0, 6 | 7 | "arm_type": "Bernoulli", 8 | "agent_arm_dict": { 9 | "0": {"param": [0.9], "repeat": [1]}, 10 | "1": {"param": [0.8], "repeat": [1000]}, 11 | "2": {"param": [0.7], "repeat": [1000]}, 12 | "3": {"param": [0.6], "repeat": [1000]}, 13 | "4": {"param": [0.5], "repeat": [1000]} 14 | }, 15 | 16 | "save_json": true, 17 | "save_h5py": false, 18 | "save_pickle": false 19 | } 20 | --------------------------------------------------------------------------------