├── beer_game_env
    ├── envs
    │   ├── __init__.py
    │   └── env.py
    └── __init__.py
├── docs
    ├── open_analytics_screen.png
    └── README.md
├── setup.py
├── README.md
├── .gitignore
└── LICENSE


/beer_game_env/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from beer_game_env.envs.env import BeerGame
2 | 


--------------------------------------------------------------------------------
/docs/open_analytics_screen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orlov-ai/beer-game-env/HEAD/docs/open_analytics_screen.png


--------------------------------------------------------------------------------
/beer_game_env/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.registration import register
2 | 
3 | register(
4 |     id='BeerGame-v0',
5 |     entry_point='beer_game_env.envs:BeerGame',
6 | )


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | setup(name='beer_game_env',
4 |       version='0.0.1',
5 |       author='Alexander Orlov',
6 |       install_requires=['gym', 'numpy', 'cloudpickle']
7 | )
8 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | Here is an example screen from https://beergame.opexanalytics.com/
 2 | 
 3 | I will use it to explain state variables in the environment, so you can better understand position of agents and 
 4 | order inside state variables. `env` is an instance of class `BeerGame`.
 5 | 
 6 | ```
 7 | env.orders = [[11, 10], [15, 16], [11, 14], [8]]
 8 | env.shipments = [[4, 8], [10, 6], [4, 8], [4, 4]]
 9 | env.stocks = [-4, -13, -6, 6]
10 | env.turn = 27 
11 | ``` 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # beer-game-env
 2 | Beer Game implemented as an OpenAI Gym environment.
 3 | 
 4 | Installation:
 5 | 
 6 | 1. Create a new conda environment to keep things clean
 7 | ```
 8 | conda create python=3.6 --name beer-game-env
 9 | source activate beer-game-env
10 | ```
11 | 
12 | 2. Clone the environment repository
13 | ```
14 | git clone https://github.com/orlov-ai/beer-game-env
15 | ```
16 | 
17 | 3. Point to root repository and install the package
18 | ```
19 | cd beer-game-env
20 | pip install -e .
21 | ```
22 | 
23 | To use:
24 | ```
25 | import gym
26 | import beer_game_env
27 | env = gym.make('BeerGame-v0', n_agents=4, env_type='classical')
28 | ```
29 | 
30 | tested with gym version `gym==0.14.0`
31 | 
32 | Need a feature? Have a problem? Just start an issue.
33 | PRs are always welcome.
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea/
 2 | 
 3 | # Byte-compiled / optimized / DLL files
 4 | __pycache__/
 5 | *.py[cod]
 6 | *$py.class
 7 | 
 8 | # C extensions
 9 | *.so
10 | 
11 | # Distribution / packaging
12 | .Python
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 | 
30 | # PyInstaller
31 | #  Usually these files are written by a python script from a template
32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 | 
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 | 
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | .hypothesis/
50 | .pytest_cache/
51 | 
52 | # Jupyter Notebook
53 | .ipynb_checkpoints
54 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 orlov-ai
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/beer_game_env/envs/env.py:
--------------------------------------------------------------------------------
  1 | import cloudpickle
  2 | import gym
  3 | from gym import error, spaces
  4 | from gym.utils import seeding
  5 | import itertools
  6 | from collections import deque
  7 | import numpy as np
  8 | 
  9 | 
 10 | def add_noise_to_init(init, noise):
 11 |     """
 12 |     Add noise to initial values.
 13 |     :type init: iterable, list or (list of lists)
 14 |     :type noise: np.array, 1-dimensional
 15 |     :rtype with_noise: np.array or (list of np.arrays)
 16 |     """
 17 |     # TODO add continuous variant
 18 |     is_init_array = all([isinstance(x, (float, int)) for x in init])
 19 | 
 20 |     if is_init_array:  # init is a list
 21 |         with_noise = (np.array(init) + noise).astype(int).tolist()
 22 |     else:  # init is a lists of lists
 23 |         with_noise = []
 24 |         c = 0
 25 |         for row in init:
 26 |             noise_row = np.array(row) + noise[c:(c + len(row))]
 27 |             noise_row = noise_row.astype(int).tolist()
 28 |             c += len(noise_row)
 29 |             with_noise.append(noise_row)
 30 | 
 31 |     return with_noise
 32 | 
 33 | 
 34 | def get_init_len(init):
 35 |     """
 36 |     Calculate total number of elements in a 1D array or list of lists.
 37 |     :type init: iterable, list or (list of lists)
 38 |     :rtype: int
 39 |     """
 40 |     is_init_array = all([isinstance(x, (float, int, np.int64)) for x in init])
 41 |     if is_init_array:
 42 |         init_len = len(init)
 43 |     else:
 44 |         init_len = len(list(itertools.chain.from_iterable(init)))
 45 |     return init_len
 46 | 
 47 | 
 48 | def transform_obs(x: dict):
 49 |     """
 50 |     transform dict of observations (one step) to an array
 51 |     :param x: dict
 52 |     :rtype: np.array
 53 |     """
 54 |     return np.array((x['next_incoming_order'], x['current_stock'], x['cum_cost'], *x['shipments'], *x['orders']))
 55 | 
 56 | 
 57 | def state_dict_to_array(state_dict: dict):
 58 |     """
 59 |     transform dict of observations (current step and previous steps) to an array
 60 |     :param state_dict:
 61 |     :rtype: np.array
 62 |     """
 63 |     # todo in this state this function is not use, need to use it
 64 |     current_obs = transform_obs(state_dict)
 65 |     if 'prev' in state_dict:
 66 |         prev_obs = np.hstack([transform_obs(x) for x in state_dict['prev']])
 67 |         flatten = np.hstack((current_obs, prev_obs))
 68 |     else:
 69 |         flatten = current_obs
 70 |     return flatten
 71 | 
 72 | 
 73 | class BeerGame(gym.Env):
 74 |     metadata = {'render.modes': ['human']}
 75 | 
 76 |     def __init__(self, n_agents: int, env_type: str, n_turns_per_game=20,
 77 |                  add_noise_initialization=False, seed=None):
 78 |         super().__init__()
 79 |         self.orders = []
 80 |         self.shipments = []
 81 |         self.next_incoming_orders = []
 82 |         self.stocks = []
 83 |         self.holding_cost = None
 84 |         self.stockout_cost = None
 85 |         self.cum_holding_cost = None
 86 |         self.cum_stockout_cost = None
 87 |         self.turns = None
 88 |         self.score_weight = None
 89 |         self.turn = None
 90 |         self.done = True
 91 |         self.n_states_concatenated = None
 92 |         self.prev_states = None
 93 |         self.np_random = None
 94 | 
 95 |         self.n_agents = n_agents
 96 |         self.env_type = env_type
 97 |         if self.env_type not in ['classical', 'uniform_0_2', 'normal_10_4']:
 98 |             raise NotImplementedError("env_type must be in ['classical', 'uniform_0_2', 'normal_10_4']")
 99 | 
100 |         self.n_turns = n_turns_per_game
101 |         self.add_noise_initialization = add_noise_initialization
102 |         self.seed(seed)
103 | 
104 |         # TODO calculate state shape
105 |         #self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS)
106 | 
107 |     def _save(self):
108 |         """
109 |         serialize environment to a pickle string
110 |         :rtype: string
111 |         """
112 |         canned = cloudpickle.dumps(self)
113 |         return canned
114 | 
115 |     def _load(self, pickle_string):
116 |         """
117 |         deserialize environment from a pickle string
118 |         """
119 |         self.__dict__.update(cloudpickle.loads(pickle_string).__dict__)
120 | 
121 |     def _get_observations(self):
122 |         observations = [None] * self.n_agents
123 |         for i in range(self.n_agents):
124 |             observations[i] = {'current_stock': self.stocks[i], 'turn': self.turn,
125 |                                'cum_cost': self.cum_holding_cost[i] + self.cum_stockout_cost[i],
126 |                                'shipments': list(self.shipments[i]), 'orders': list(self.orders[i])[::-1],
127 |                                'next_incoming_order': self.next_incoming_orders[i]}
128 |         return observations
129 | 
130 |     def _get_rewards(self):
131 |         return -(self.holding_cost + self.stockout_cost)
132 | 
133 |     def _get_demand(self):
134 |         return self.turns[self.turn]
135 | 
136 |     def seed(self, seed=None):
137 |         self.np_random, seed = seeding.np_random(seed)
138 |         return [seed]
139 | 
140 |     def reset(self):
141 |         self.done = False
142 | 
143 |         if self.env_type == 'classical':
144 |             temp_orders = [[4, 4]] * (self.n_agents - 1) + [[4]]
145 |             temp_shipments = [[4, 4]] * self.n_agents
146 |             self.next_incoming_orders = [4] * self.n_agents
147 |             self.stocks = [12] * self.n_agents
148 | 
149 |             if self.add_noise_initialization:
150 |                 # noise is uniform [-2,2]
151 |                 orders_noise = np.random.choice(np.arange(5), size=get_init_len(temp_orders)) - 2
152 |                 temp_orders = add_noise_to_init(temp_orders, orders_noise)
153 | 
154 |                 shipments_noise = np.random.choice(np.arange(5), size=get_init_len(temp_shipments)) - 2
155 |                 temp_shipments = add_noise_to_init(temp_shipments, shipments_noise)
156 | 
157 |                 last_incoming_orders_noise = np.random.choice(np.arange(5),
158 |                                                               size=get_init_len(self.next_incoming_orders)) - 2
159 |                 self.next_incoming_orders = add_noise_to_init(self.next_incoming_orders, last_incoming_orders_noise)
160 | 
161 |                 stocks_noise = np.random.choice(np.arange(13), size=get_init_len(self.stocks)) - 6
162 |                 self.stocks = add_noise_to_init(self.stocks, stocks_noise)
163 | 
164 |             self.turns = [4] * 4 + [8] * (self.n_turns - 4)
165 |             self.score_weight = [[0.5] * self.n_agents, [1] * self.n_agents]
166 | 
167 |         elif self.env_type == 'uniform_0_2':
168 |             temp_orders = [[1, 1]] * (self.n_agents - 1) + [[1]]
169 |             temp_shipments = [[1, 1]] * self.n_agents
170 |             self.next_incoming_orders = [1] * self.n_agents
171 |             self.stocks = [4] * self.n_agents
172 | 
173 |             if self.add_noise_initialization:
174 |                 # noise is uniform [-1,1]
175 |                 orders_noise = np.random.choice(np.arange(3), size=get_init_len(temp_orders)) - 1
176 |                 temp_orders = add_noise_to_init(temp_orders, orders_noise)
177 | 
178 |                 shipments_noise = np.random.choice(np.arange(3), size=get_init_len(temp_shipments)) - 1
179 |                 temp_shipments = add_noise_to_init(temp_shipments, shipments_noise)
180 | 
181 |                 last_incoming_orders_noise = np.random.choice(np.arange(3),
182 |                                                               size=get_init_len(self.next_incoming_orders)) - 1
183 |                 self.next_incoming_orders = add_noise_to_init(self.next_incoming_orders, last_incoming_orders_noise)
184 | 
185 |                 stocks_noise = np.random.choice(np.arange(5), size=get_init_len(self.stocks)) - 2
186 |                 self.stocks = add_noise_to_init(self.stocks, stocks_noise)
187 | 
188 |             # uniform [0, 2]
189 |             self.turns = self.np_random.uniform(low=0, high=3, size=self.n_turns).astype(np.int)
190 |             self.score_weight = [[0.5] * self.n_agents, [1] * self.n_agents]
191 | 
192 |         elif self.env_type == 'normal_10_4':
193 |             temp_orders = [[10, 10]] * (self.n_agents - 1) + [[10]]
194 |             temp_shipments = [[10, 10]] * self.n_agents
195 |             self.next_incoming_orders = [10] * self.n_agents
196 |             self.stocks = [40] * self.n_agents
197 | 
198 |             if self.add_noise_initialization:
199 |                 # noise is uniform [-1,1]
200 |                 orders_noise = np.random.normal(loc=0, scale=5, size=get_init_len(temp_orders))
201 |                 orders_noise = np.clip(orders_noise, -10, 10)  # clip to prevent negative orders
202 |                 temp_orders = add_noise_to_init(temp_orders, orders_noise)
203 | 
204 |                 shipments_noise = np.random.normal(loc=0, scale=5, size=get_init_len(temp_shipments))
205 |                 shipments_noise = np.clip(shipments_noise, -10, 10)  # clip to prevent negative shipments
206 |                 temp_shipments = add_noise_to_init(temp_shipments, shipments_noise)
207 | 
208 |                 last_incoming_orders_noise = np.random.normal(loc=0,
209 |                                                               scale=5, size=get_init_len(self.next_incoming_orders))
210 |                 last_incoming_orders_noise = np.clip(last_incoming_orders_noise, -10, 10)
211 |                 self.next_incoming_orders = add_noise_to_init(self.next_incoming_orders, last_incoming_orders_noise)
212 | 
213 |                 stocks_noise = np.random.normal(loc=0, scale=4, size=get_init_len(self.stocks))
214 |                 stocks_noise = np.clip(stocks_noise, -10, 10)
215 |                 self.stocks = add_noise_to_init(self.stocks, stocks_noise)
216 | 
217 |             self.turns = self.np_random.normal(loc=10, scale=4, size=self.n_turns)
218 |             self.turns = np.clip(self.turns, 0, 1000).astype(np.int)
219 |             # dqn paper page 24
220 |             self.score_weight = [[1.0, 0.75, 0.5, 0.25] * self.n_agents, [10.0] + [0.0] * (self.n_agents - 1)]
221 | 
222 |         else:
223 |             raise ValueError('wrong env_type')
224 | 
225 |         # initialize other variables
226 |         self.holding_cost = np.zeros(self.n_agents, dtype=np.float)
227 |         self.stockout_cost = np.zeros(self.n_agents, dtype=np.float)
228 |         self.cum_holding_cost = np.zeros(self.n_agents, dtype=np.float)
229 |         self.cum_stockout_cost = np.zeros(self.n_agents, dtype=np.float)
230 |         self.orders = [deque(x) for x in temp_orders]
231 |         self.shipments = [deque(x) for x in temp_shipments]
232 |         self.turn = 0
233 |         self.done = False
234 | 
235 |         self.n_states_concatenated = 3
236 |         temp_obs = [None] * self.n_agents
237 |         for i in range(self.n_agents):
238 |             temp_obs[i] = {'current_stock': self.stocks[i], 'turn': self.turn,
239 |                            'cum_cost': self.cum_holding_cost[i] + self.cum_stockout_cost[i],
240 |                            'shipments': list(self.shipments[i]), 'orders': list(self.orders[i])[::-1],
241 |                            'next_incoming_order': self.next_incoming_orders[i]}
242 |         prev_state = temp_obs
243 |         self.prev_states = deque([prev_state] * (self.n_states_concatenated - 1))
244 |         return self._get_observations()
245 | 
246 |     def render(self, mode='human'):
247 |         if mode != 'human':
248 |             raise NotImplementedError(f'Render mode {mode} is not implemented yet')
249 |         print('\n' + '=' * 20)
250 |         print('Turn:     ', self.turn)
251 |         print('Stocks:   ', ", ".join([str(x) for x in self.stocks]))
252 |         print('Orders:   ', [list(x) for x in self.orders])
253 |         print('Shipments:', [list(x) for x in self.shipments])
254 |         print('Last incoming orders:  ', self.next_incoming_orders)
255 |         print('Cum holding cost:  ', self.cum_stockout_cost)
256 |         print('Cum stockout cost: ', self.cum_holding_cost)
257 |         print('Last holding cost: ', self.holding_cost)
258 |         print('Last stockout cost:', self.stockout_cost)
259 | 
260 |     def step(self, action: list):
261 |         # sanity checks
262 |         if self.done:
263 |             raise error.ResetNeeded('Environment is finished, please run env.reset() before taking actions')
264 |         if get_init_len(action) != self.n_agents:
265 |             raise error.InvalidAction(f'Length of action array must be same as n_agents({self.n_agents})')
266 |         if any(np.array(action) < 0):
267 |             raise error.InvalidAction(f"You can't order negative amount. You agents actions are: {action}")
268 | 
269 |         # concatenate previous states, self.prev_states in an queue of previous states
270 |         self.prev_states.popleft()
271 |         self.prev_states.append(self._get_observations())
272 |         # make incoming step
273 |         demand = self._get_demand()
274 |         orders_inc = [order.popleft() for order in self.orders]
275 |         self.next_incoming_orders = [demand] + orders_inc[:-1]
276 |         ship_inc = [shipment.popleft() for shipment in self.shipments]
277 |         # calculate shipments respecting orders and stock levels
278 |         for i in range(self.n_agents - 1):
279 |             max_possible_shipment = max(0, self.stocks[i + 1]) + ship_inc[i + 1]  # stock + incoming shipment
280 |             order = orders_inc[i] + max(0, -self.stocks[i + 1])  # incoming order + stockout
281 |             max_possible_shipment = min(order, max_possible_shipment)
282 |             self.shipments[i].append(max_possible_shipment)
283 |         self.shipments[-1].append(orders_inc[-1])
284 |         # update stocks
285 |         self.stocks = [(stock + inc) for stock, inc in zip(self.stocks, ship_inc)]
286 |         for i in range(1, self.n_agents):
287 |             self.stocks[i] -= orders_inc[i - 1]
288 |         self.stocks[0] -= demand
289 |         # update orders
290 |         for i in range(self.n_agents):
291 |             self.orders[i].append(action[i])
292 |         self.next_incoming_orders = [self._get_demand()] + [x[0] for x in self.orders[:-1]]
293 | 
294 |         # calculate costs
295 |         self.holding_cost = np.zeros(self.n_agents, dtype=np.float)
296 |         self.stockout_cost = np.zeros(self.n_agents, dtype=np.float)
297 |         for i in range(self.n_agents):
298 |             if self.stocks[i] >= 0:
299 |                 self.holding_cost[i] = self.stocks[i] * self.score_weight[0][i]
300 |             else:
301 |                 self.stockout_cost[i] = -self.stocks[i] * self.score_weight[1][i]
302 |         self.cum_holding_cost += self.holding_cost
303 |         self.cum_stockout_cost += self.stockout_cost
304 |         # calculate reward
305 |         rewards = self._get_rewards()
306 | 
307 |         # check if done
308 |         if self.turn == self.n_turns - 1:
309 |             self.done = True
310 |         else:
311 |             self.turn += 1
312 |         state = self._get_observations()
313 |         # todo flatten observation dict
314 |         return state, rewards, self.done, {}
315 | 
316 | 
317 | if __name__ == '__main__':
318 |     env = BeerGame(n_agents=4, env_type='classical')
319 |     start_state = env.reset()
320 |     for i, obs in enumerate(start_state):
321 |         print(f'Agent {i} observation: {obs}')
322 |     env.render()
323 |     done = False
324 |     while not done:
325 |         actions = np.random.uniform(0, 16, size=4)
326 |         actions = actions.astype(int)
327 |         step_state, step_rewards, done, _ = env.step(actions)
328 |         env.render()
329 | 
330 |     # you can also save and load environment via
331 |     # canned_env = env._save()
332 |     # env._load(canned_env)
333 | 


--------------------------------------------------------------------------------