├── beer_game_env ├── envs │ ├── __init__.py │ └── env.py └── __init__.py ├── docs ├── open_analytics_screen.png └── README.md ├── setup.py ├── README.md ├── .gitignore └── LICENSE /beer_game_env/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from beer_game_env.envs.env import BeerGame 2 | -------------------------------------------------------------------------------- /docs/open_analytics_screen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/orlov-ai/beer-game-env/HEAD/docs/open_analytics_screen.png -------------------------------------------------------------------------------- /beer_game_env/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | 3 | register( 4 | id='BeerGame-v0', 5 | entry_point='beer_game_env.envs:BeerGame', 6 | ) -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup(name='beer_game_env', 4 | version='0.0.1', 5 | author='Alexander Orlov', 6 | install_requires=['gym', 'numpy', 'cloudpickle'] 7 | ) 8 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | Here is an example screen from https://beergame.opexanalytics.com/ 2 | 3 | I will use it to explain state variables in the environment, so you can better understand position of agents and 4 | order inside state variables. `env` is an instance of class `BeerGame`. 5 | 6 | ``` 7 | env.orders = [[11, 10], [15, 16], [11, 14], [8]] 8 | env.shipments = [[4, 8], [10, 6], [4, 8], [4, 4]] 9 | env.stocks = [-4, -13, -6, 6] 10 | env.turn = 27 11 | ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # beer-game-env 2 | Beer Game implemented as an OpenAI Gym environment. 3 | 4 | Installation: 5 | 6 | 1. Create a new conda environment to keep things clean 7 | ``` 8 | conda create python=3.6 --name beer-game-env 9 | source activate beer-game-env 10 | ``` 11 | 12 | 2. Clone the environment repository 13 | ``` 14 | git clone https://github.com/orlov-ai/beer-game-env 15 | ``` 16 | 17 | 3. Point to root repository and install the package 18 | ``` 19 | cd beer-game-env 20 | pip install -e . 21 | ``` 22 | 23 | To use: 24 | ``` 25 | import gym 26 | import beer_game_env 27 | env = gym.make('BeerGame-v0', n_agents=4, env_type='classical') 28 | ``` 29 | 30 | tested with gym version `gym==0.14.0` 31 | 32 | Need a feature? Have a problem? Just start an issue. 33 | PRs are always welcome. 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Jupyter Notebook 53 | .ipynb_checkpoints 54 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 orlov-ai 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /beer_game_env/envs/env.py: -------------------------------------------------------------------------------- 1 | import cloudpickle 2 | import gym 3 | from gym import error, spaces 4 | from gym.utils import seeding 5 | import itertools 6 | from collections import deque 7 | import numpy as np 8 | 9 | 10 | def add_noise_to_init(init, noise): 11 | """ 12 | Add noise to initial values. 13 | :type init: iterable, list or (list of lists) 14 | :type noise: np.array, 1-dimensional 15 | :rtype with_noise: np.array or (list of np.arrays) 16 | """ 17 | # TODO add continuous variant 18 | is_init_array = all([isinstance(x, (float, int)) for x in init]) 19 | 20 | if is_init_array: # init is a list 21 | with_noise = (np.array(init) + noise).astype(int).tolist() 22 | else: # init is a lists of lists 23 | with_noise = [] 24 | c = 0 25 | for row in init: 26 | noise_row = np.array(row) + noise[c:(c + len(row))] 27 | noise_row = noise_row.astype(int).tolist() 28 | c += len(noise_row) 29 | with_noise.append(noise_row) 30 | 31 | return with_noise 32 | 33 | 34 | def get_init_len(init): 35 | """ 36 | Calculate total number of elements in a 1D array or list of lists. 37 | :type init: iterable, list or (list of lists) 38 | :rtype: int 39 | """ 40 | is_init_array = all([isinstance(x, (float, int, np.int64)) for x in init]) 41 | if is_init_array: 42 | init_len = len(init) 43 | else: 44 | init_len = len(list(itertools.chain.from_iterable(init))) 45 | return init_len 46 | 47 | 48 | def transform_obs(x: dict): 49 | """ 50 | transform dict of observations (one step) to an array 51 | :param x: dict 52 | :rtype: np.array 53 | """ 54 | return np.array((x['next_incoming_order'], x['current_stock'], x['cum_cost'], *x['shipments'], *x['orders'])) 55 | 56 | 57 | def state_dict_to_array(state_dict: dict): 58 | """ 59 | transform dict of observations (current step and previous steps) to an array 60 | :param state_dict: 61 | :rtype: np.array 62 | """ 63 | # todo in this state this function is not use, need to use it 64 | current_obs = transform_obs(state_dict) 65 | if 'prev' in state_dict: 66 | prev_obs = np.hstack([transform_obs(x) for x in state_dict['prev']]) 67 | flatten = np.hstack((current_obs, prev_obs)) 68 | else: 69 | flatten = current_obs 70 | return flatten 71 | 72 | 73 | class BeerGame(gym.Env): 74 | metadata = {'render.modes': ['human']} 75 | 76 | def __init__(self, n_agents: int, env_type: str, n_turns_per_game=20, 77 | add_noise_initialization=False, seed=None): 78 | super().__init__() 79 | self.orders = [] 80 | self.shipments = [] 81 | self.next_incoming_orders = [] 82 | self.stocks = [] 83 | self.holding_cost = None 84 | self.stockout_cost = None 85 | self.cum_holding_cost = None 86 | self.cum_stockout_cost = None 87 | self.turns = None 88 | self.score_weight = None 89 | self.turn = None 90 | self.done = True 91 | self.n_states_concatenated = None 92 | self.prev_states = None 93 | self.np_random = None 94 | 95 | self.n_agents = n_agents 96 | self.env_type = env_type 97 | if self.env_type not in ['classical', 'uniform_0_2', 'normal_10_4']: 98 | raise NotImplementedError("env_type must be in ['classical', 'uniform_0_2', 'normal_10_4']") 99 | 100 | self.n_turns = n_turns_per_game 101 | self.add_noise_initialization = add_noise_initialization 102 | self.seed(seed) 103 | 104 | # TODO calculate state shape 105 | #self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS) 106 | 107 | def _save(self): 108 | """ 109 | serialize environment to a pickle string 110 | :rtype: string 111 | """ 112 | canned = cloudpickle.dumps(self) 113 | return canned 114 | 115 | def _load(self, pickle_string): 116 | """ 117 | deserialize environment from a pickle string 118 | """ 119 | self.__dict__.update(cloudpickle.loads(pickle_string).__dict__) 120 | 121 | def _get_observations(self): 122 | observations = [None] * self.n_agents 123 | for i in range(self.n_agents): 124 | observations[i] = {'current_stock': self.stocks[i], 'turn': self.turn, 125 | 'cum_cost': self.cum_holding_cost[i] + self.cum_stockout_cost[i], 126 | 'shipments': list(self.shipments[i]), 'orders': list(self.orders[i])[::-1], 127 | 'next_incoming_order': self.next_incoming_orders[i]} 128 | return observations 129 | 130 | def _get_rewards(self): 131 | return -(self.holding_cost + self.stockout_cost) 132 | 133 | def _get_demand(self): 134 | return self.turns[self.turn] 135 | 136 | def seed(self, seed=None): 137 | self.np_random, seed = seeding.np_random(seed) 138 | return [seed] 139 | 140 | def reset(self): 141 | self.done = False 142 | 143 | if self.env_type == 'classical': 144 | temp_orders = [[4, 4]] * (self.n_agents - 1) + [[4]] 145 | temp_shipments = [[4, 4]] * self.n_agents 146 | self.next_incoming_orders = [4] * self.n_agents 147 | self.stocks = [12] * self.n_agents 148 | 149 | if self.add_noise_initialization: 150 | # noise is uniform [-2,2] 151 | orders_noise = np.random.choice(np.arange(5), size=get_init_len(temp_orders)) - 2 152 | temp_orders = add_noise_to_init(temp_orders, orders_noise) 153 | 154 | shipments_noise = np.random.choice(np.arange(5), size=get_init_len(temp_shipments)) - 2 155 | temp_shipments = add_noise_to_init(temp_shipments, shipments_noise) 156 | 157 | last_incoming_orders_noise = np.random.choice(np.arange(5), 158 | size=get_init_len(self.next_incoming_orders)) - 2 159 | self.next_incoming_orders = add_noise_to_init(self.next_incoming_orders, last_incoming_orders_noise) 160 | 161 | stocks_noise = np.random.choice(np.arange(13), size=get_init_len(self.stocks)) - 6 162 | self.stocks = add_noise_to_init(self.stocks, stocks_noise) 163 | 164 | self.turns = [4] * 4 + [8] * (self.n_turns - 4) 165 | self.score_weight = [[0.5] * self.n_agents, [1] * self.n_agents] 166 | 167 | elif self.env_type == 'uniform_0_2': 168 | temp_orders = [[1, 1]] * (self.n_agents - 1) + [[1]] 169 | temp_shipments = [[1, 1]] * self.n_agents 170 | self.next_incoming_orders = [1] * self.n_agents 171 | self.stocks = [4] * self.n_agents 172 | 173 | if self.add_noise_initialization: 174 | # noise is uniform [-1,1] 175 | orders_noise = np.random.choice(np.arange(3), size=get_init_len(temp_orders)) - 1 176 | temp_orders = add_noise_to_init(temp_orders, orders_noise) 177 | 178 | shipments_noise = np.random.choice(np.arange(3), size=get_init_len(temp_shipments)) - 1 179 | temp_shipments = add_noise_to_init(temp_shipments, shipments_noise) 180 | 181 | last_incoming_orders_noise = np.random.choice(np.arange(3), 182 | size=get_init_len(self.next_incoming_orders)) - 1 183 | self.next_incoming_orders = add_noise_to_init(self.next_incoming_orders, last_incoming_orders_noise) 184 | 185 | stocks_noise = np.random.choice(np.arange(5), size=get_init_len(self.stocks)) - 2 186 | self.stocks = add_noise_to_init(self.stocks, stocks_noise) 187 | 188 | # uniform [0, 2] 189 | self.turns = self.np_random.uniform(low=0, high=3, size=self.n_turns).astype(np.int) 190 | self.score_weight = [[0.5] * self.n_agents, [1] * self.n_agents] 191 | 192 | elif self.env_type == 'normal_10_4': 193 | temp_orders = [[10, 10]] * (self.n_agents - 1) + [[10]] 194 | temp_shipments = [[10, 10]] * self.n_agents 195 | self.next_incoming_orders = [10] * self.n_agents 196 | self.stocks = [40] * self.n_agents 197 | 198 | if self.add_noise_initialization: 199 | # noise is uniform [-1,1] 200 | orders_noise = np.random.normal(loc=0, scale=5, size=get_init_len(temp_orders)) 201 | orders_noise = np.clip(orders_noise, -10, 10) # clip to prevent negative orders 202 | temp_orders = add_noise_to_init(temp_orders, orders_noise) 203 | 204 | shipments_noise = np.random.normal(loc=0, scale=5, size=get_init_len(temp_shipments)) 205 | shipments_noise = np.clip(shipments_noise, -10, 10) # clip to prevent negative shipments 206 | temp_shipments = add_noise_to_init(temp_shipments, shipments_noise) 207 | 208 | last_incoming_orders_noise = np.random.normal(loc=0, 209 | scale=5, size=get_init_len(self.next_incoming_orders)) 210 | last_incoming_orders_noise = np.clip(last_incoming_orders_noise, -10, 10) 211 | self.next_incoming_orders = add_noise_to_init(self.next_incoming_orders, last_incoming_orders_noise) 212 | 213 | stocks_noise = np.random.normal(loc=0, scale=4, size=get_init_len(self.stocks)) 214 | stocks_noise = np.clip(stocks_noise, -10, 10) 215 | self.stocks = add_noise_to_init(self.stocks, stocks_noise) 216 | 217 | self.turns = self.np_random.normal(loc=10, scale=4, size=self.n_turns) 218 | self.turns = np.clip(self.turns, 0, 1000).astype(np.int) 219 | # dqn paper page 24 220 | self.score_weight = [[1.0, 0.75, 0.5, 0.25] * self.n_agents, [10.0] + [0.0] * (self.n_agents - 1)] 221 | 222 | else: 223 | raise ValueError('wrong env_type') 224 | 225 | # initialize other variables 226 | self.holding_cost = np.zeros(self.n_agents, dtype=np.float) 227 | self.stockout_cost = np.zeros(self.n_agents, dtype=np.float) 228 | self.cum_holding_cost = np.zeros(self.n_agents, dtype=np.float) 229 | self.cum_stockout_cost = np.zeros(self.n_agents, dtype=np.float) 230 | self.orders = [deque(x) for x in temp_orders] 231 | self.shipments = [deque(x) for x in temp_shipments] 232 | self.turn = 0 233 | self.done = False 234 | 235 | self.n_states_concatenated = 3 236 | temp_obs = [None] * self.n_agents 237 | for i in range(self.n_agents): 238 | temp_obs[i] = {'current_stock': self.stocks[i], 'turn': self.turn, 239 | 'cum_cost': self.cum_holding_cost[i] + self.cum_stockout_cost[i], 240 | 'shipments': list(self.shipments[i]), 'orders': list(self.orders[i])[::-1], 241 | 'next_incoming_order': self.next_incoming_orders[i]} 242 | prev_state = temp_obs 243 | self.prev_states = deque([prev_state] * (self.n_states_concatenated - 1)) 244 | return self._get_observations() 245 | 246 | def render(self, mode='human'): 247 | if mode != 'human': 248 | raise NotImplementedError(f'Render mode {mode} is not implemented yet') 249 | print('\n' + '=' * 20) 250 | print('Turn: ', self.turn) 251 | print('Stocks: ', ", ".join([str(x) for x in self.stocks])) 252 | print('Orders: ', [list(x) for x in self.orders]) 253 | print('Shipments:', [list(x) for x in self.shipments]) 254 | print('Last incoming orders: ', self.next_incoming_orders) 255 | print('Cum holding cost: ', self.cum_stockout_cost) 256 | print('Cum stockout cost: ', self.cum_holding_cost) 257 | print('Last holding cost: ', self.holding_cost) 258 | print('Last stockout cost:', self.stockout_cost) 259 | 260 | def step(self, action: list): 261 | # sanity checks 262 | if self.done: 263 | raise error.ResetNeeded('Environment is finished, please run env.reset() before taking actions') 264 | if get_init_len(action) != self.n_agents: 265 | raise error.InvalidAction(f'Length of action array must be same as n_agents({self.n_agents})') 266 | if any(np.array(action) < 0): 267 | raise error.InvalidAction(f"You can't order negative amount. You agents actions are: {action}") 268 | 269 | # concatenate previous states, self.prev_states in an queue of previous states 270 | self.prev_states.popleft() 271 | self.prev_states.append(self._get_observations()) 272 | # make incoming step 273 | demand = self._get_demand() 274 | orders_inc = [order.popleft() for order in self.orders] 275 | self.next_incoming_orders = [demand] + orders_inc[:-1] 276 | ship_inc = [shipment.popleft() for shipment in self.shipments] 277 | # calculate shipments respecting orders and stock levels 278 | for i in range(self.n_agents - 1): 279 | max_possible_shipment = max(0, self.stocks[i + 1]) + ship_inc[i + 1] # stock + incoming shipment 280 | order = orders_inc[i] + max(0, -self.stocks[i + 1]) # incoming order + stockout 281 | max_possible_shipment = min(order, max_possible_shipment) 282 | self.shipments[i].append(max_possible_shipment) 283 | self.shipments[-1].append(orders_inc[-1]) 284 | # update stocks 285 | self.stocks = [(stock + inc) for stock, inc in zip(self.stocks, ship_inc)] 286 | for i in range(1, self.n_agents): 287 | self.stocks[i] -= orders_inc[i - 1] 288 | self.stocks[0] -= demand 289 | # update orders 290 | for i in range(self.n_agents): 291 | self.orders[i].append(action[i]) 292 | self.next_incoming_orders = [self._get_demand()] + [x[0] for x in self.orders[:-1]] 293 | 294 | # calculate costs 295 | self.holding_cost = np.zeros(self.n_agents, dtype=np.float) 296 | self.stockout_cost = np.zeros(self.n_agents, dtype=np.float) 297 | for i in range(self.n_agents): 298 | if self.stocks[i] >= 0: 299 | self.holding_cost[i] = self.stocks[i] * self.score_weight[0][i] 300 | else: 301 | self.stockout_cost[i] = -self.stocks[i] * self.score_weight[1][i] 302 | self.cum_holding_cost += self.holding_cost 303 | self.cum_stockout_cost += self.stockout_cost 304 | # calculate reward 305 | rewards = self._get_rewards() 306 | 307 | # check if done 308 | if self.turn == self.n_turns - 1: 309 | self.done = True 310 | else: 311 | self.turn += 1 312 | state = self._get_observations() 313 | # todo flatten observation dict 314 | return state, rewards, self.done, {} 315 | 316 | 317 | if __name__ == '__main__': 318 | env = BeerGame(n_agents=4, env_type='classical') 319 | start_state = env.reset() 320 | for i, obs in enumerate(start_state): 321 | print(f'Agent {i} observation: {obs}') 322 | env.render() 323 | done = False 324 | while not done: 325 | actions = np.random.uniform(0, 16, size=4) 326 | actions = actions.astype(int) 327 | step_state, step_rewards, done, _ = env.step(actions) 328 | env.render() 329 | 330 | # you can also save and load environment via 331 | # canned_env = env._save() 332 | # env._load(canned_env) 333 | --------------------------------------------------------------------------------