├── .gitignore
├── Others
    ├── agents
    │   ├── __init__.py
    │   ├── config_agents.py
    │   ├── evaluation.py
    │   ├── pos_cac_fo
    │   │   ├── __init__.py
    │   │   ├── agent.py
    │   │   ├── dq_network.py
    │   │   ├── replay_buffer.py
    │   │   └── trainer.py
    │   ├── replay_buffer.py
    │   └── simple_agent.py
    ├── config.py
    ├── envs
    │   ├── #environment.py#
    │   ├── __init__.py
    │   ├── config_env.py
    │   ├── environment.py
    │   ├── grid_core.py
    │   ├── scenario.py
    │   └── scenarios
    │   │   ├── __init__.py
    │   │   ├── endless.py
    │   │   ├── pursuit.py
    │   │   ├── single_agent.py
    │   │   └── static_prey.py
    ├── main.py
    ├── make_env.py
    ├── readme
    ├── run_DQN2.sh
    └── test.py
├── Predator-Prey
    ├── agents
    │   ├── __init__.py
    │   ├── config_agents.py
    │   ├── evaluation.py
    │   ├── non_simple_agent.py
    │   ├── pos_cac_fo
    │   │   ├── __init__.py
    │   │   ├── agent.py
    │   │   ├── dq_network.py
    │   │   ├── replay_buffer.py
    │   │   └── trainer.py
    │   ├── replay_buffer.py
    │   └── simple_agent.py
    ├── config.py
    ├── envs
    │   ├── __init__.py
    │   ├── config_env.py
    │   ├── environment.py
    │   ├── grid_core.py
    │   ├── gui
    │   │   ├── __init__.py
    │   │   ├── canvas.py
    │   │   └── guiObjects.py
    │   ├── scenario.py
    │   └── scenarios
    │   │   ├── __init__.py
    │   │   ├── endless.py
    │   │   ├── endless2.py
    │   │   ├── endless3.py
    │   │   ├── pursuit.py
    │   │   ├── single_agent.py
    │   │   └── static_prey.py
    ├── main.py
    ├── make_env.py
    ├── readme
    ├── run_DQN9.sh
    └── test.py
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/Others/agents/__init__.py:
--------------------------------------------------------------------------------
 1 | import imp
 2 | import os.path as osp
 3 | import logging
 4 | 
 5 | 
 6 | def load(name):
 7 |     pathname = osp.join(osp.dirname(__file__), name)
 8 |     return imp.load_source('', pathname)
 9 | 
10 | 
11 | logger_agent = logging.getLogger('Agent')
12 | logger_agent.setLevel(logging.INFO)
13 | fh_agent = logging.FileHandler('./agent.log')
14 | sh = logging.StreamHandler()
15 | fm = logging.Formatter('[%(levelname)s|%(filename)s:%(lineno)s] %(asctime)s > [%(name)s] %(message)s')
16 | fh_agent.setFormatter(fm)
17 | sh.setFormatter(fm)
18 | logger_agent.addHandler(fh_agent)
19 | logger_agent.addHandler(sh)
20 | 


--------------------------------------------------------------------------------
/Others/agents/config_agents.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf8
 3 | # import agents
 4 | 
 5 | 
 6 | def config_agent(_flags):
 7 |     flags = _flags
 8 | 
 9 |     flags.DEFINE_string("agent", "cac_fo", "Agent")
10 | 
11 |     flags.DEFINE_integer("training_step", 10000, "Training time step")
12 |     flags.DEFINE_integer("testing_step", 1, "Testing time step")
13 |     flags.DEFINE_integer("max_step", 200, "Maximum time step per episode")
14 |     flags.DEFINE_integer("eval_step", 100, "Number of steps before training")
15 |     # flags.DEFINE_integer("training_step", 5000, "Training time step")
16 |     # flags.DEFINE_integer("testing_step", 1000, "Testing time step")
17 |     # flags.DEFINE_integer("max_step", 200, "Maximum time step per episode")
18 |     # flags.DEFINE_integer("eval_step", 1000, "Number of steps before training")
19 | 
20 |     flags.DEFINE_integer("b_size", 50000, "Size of the replay memory")
21 |     flags.DEFINE_integer("m_size", 64, "Minibatch size")
22 |     flags.DEFINE_integer("pre_train_step", 10, "during [m_size * pre_step] take random action")
23 |     flags.DEFINE_float("lr", 0.0005, "Learning rate")
24 |     # flags.DEFINE_float("lr", 0.01, "Learning rate") # it is for single
25 |     flags.DEFINE_float("df", 0.99, "Discount factor")
26 | 
27 |     flags.DEFINE_boolean("load_nn", False, "Load nn from file or not")
28 |     flags.DEFINE_string("nn_file", "results/nn/s", "The name of file for loading")
29 |     
30 |     flags.DEFINE_boolean("train", True, "Training or testing")
31 |     flags.DEFINE_boolean("qtrace", False, "Use q trace")
32 |     flags.DEFINE_boolean("kt", False, "Keyboard input test")
33 |     flags.DEFINE_boolean("use_action_in_critic", False, "Use guided samples")
34 |     flags.DEFINE_string("algorithm", "ddd", "algorithm")
35 |     flags.DEFINE_string("epsilon", "Yes", "Use eps-greedy decreasing method (or other options can be added")
36 | 
37 | 
38 | 
39 | 
40 | def get_filename():
41 |     import config
42 |     FLAGS = config.flags.FLAGS
43 | 
44 |     return "a-"+FLAGS.agent+"-lr-"+str(FLAGS.lr)+"-ms-"+str(FLAGS.m_size)+"-algorithm-"+str(FLAGS.algorithm)


--------------------------------------------------------------------------------
/Others/agents/evaluation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf8
 3 | 
 4 | import numpy as np
 5 | import logging
 6 | import config
 7 | 
 8 | FLAGS = config.flags.FLAGS
 9 | result = logging.getLogger('Result')
10 | 
11 | class Evaluation(object):
12 | 
13 |     def __init__(self):
14 |         self.episode_cnt = 0
15 |         self.m = dict()
16 | 
17 |     def update_value(self, m_key, m_value, m_append=None):
18 |         if m_key in self.m:
19 |             self.m[m_key]['value'] += m_value
20 |             self.m[m_key]['cnt'] += 1
21 |         else:
22 |             self.m[m_key] = dict()
23 |             self.m[m_key]['value'] = m_value
24 |             self.m[m_key]['cnt'] = 1
25 |         if m_append is None:
26 |             result.info(m_key + "\t" + str(m_value))
27 |         else:
28 |             result.info(m_key + "\t" + str(m_value) + "\t" + str(m_append))
29 | 
30 |     def summarize(self, key=None):
31 |         if key is None:
32 |             for k in self.m:
33 |                 print "Average", k, float(self.m[k]['value'])/self.m[k]['cnt']
34 |                 result.info("summary\t" + k + "\t" + str(float(self.m[k]['value']) / self.m[k]['cnt']))
35 | 
36 |         elif key not in self.m:
37 |             print "Wrong key"
38 | 
39 |         else:
40 |             print "Average", key, float(self.m[key]['value']) / self.m[key]['cnt']
41 |             result.info("summary\t" + key + "\t" + str(float(self.m[key]['value'])/self.m[key]['cnt']))
42 | 


--------------------------------------------------------------------------------
/Others/agents/pos_cac_fo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sonkyunghwan/QTRAN/785c44ebc8379896dc9f513af2ac767d61013914/Others/agents/pos_cac_fo/__init__.py


--------------------------------------------------------------------------------
/Others/agents/pos_cac_fo/agent.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding=utf8
  3 | 
  4 | """
  5 | ===========================================
  6 |  :mod:`qlearn` Q-Learning
  7 | ===========================================
  8 | 
  9 | 설명
 10 | =====
 11 | 
 12 | Choose action based on q-learning algorithm
 13 | """
 14 | 
 15 | import numpy as np
 16 | import tensorflow as tf
 17 | import math
 18 | from agents.pos_cac_fo.dq_network import *
 19 | from agents.pos_cac_fo.replay_buffer import *
 20 | from agents.evaluation import Evaluation
 21 | import matplotlib
 22 | matplotlib.use('Agg')
 23 | import matplotlib.pyplot as plt
 24 | import matplotlib.animation as animation
 25 | 
 26 | import logging
 27 | import config
 28 | 
 29 | FLAGS = config.flags.FLAGS
 30 | logger = logging.getLogger("Agent")
 31 | result = logging.getLogger('Result')
 32 | 
 33 | 
 34 | class Agent(object):
 35 | 
 36 |     def __init__(self, action_dim, obs_dim, name=""):
 37 |         logger.info("Centralized DQN Agent")
 38 | 
 39 | 
 40 |         self._obs_dim = obs_dim
 41 |         self._n_player = FLAGS.n_predator
 42 |         self._action_dim = action_dim * self._n_player
 43 |         self._action_dim_single = action_dim
 44 |         self._state_dim = obs_dim
 45 | 
 46 |         self._name = name
 47 |         self.update_cnt = 0
 48 |         self.target_update_period = 3000
 49 | 
 50 |         self.df = FLAGS.df
 51 |         self.lr = FLAGS.lr
 52 | 
 53 |         # Make Q-network
 54 |         tf.reset_default_graph()
 55 |         my_graph = tf.Graph()
 56 | 
 57 |         with my_graph.as_default():
 58 |             self.sess = tf.Session(graph=my_graph, config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)))
 59 |             self.q_network = DQNetwork(self.sess, self._state_dim, self._action_dim_single, self._n_player)
 60 |             self.sess.run(tf.global_variables_initializer())
 61 |             self.saver = tf.train.Saver()
 62 |             if FLAGS.load_nn:
 63 |                 print "LOAD!"
 64 |                 self.saver.restore(self.sess, FLAGS.nn_file)
 65 | 
 66 |         self.replay_buffer = ReplayBuffer()
 67 | 
 68 |         self._eval = Evaluation()
 69 |         self.q_prev = None
 70 | 
 71 |         self.ims = []
 72 |         plt.clf()
 73 |         self.fig = plt.figure()
 74 |         self.axes = plt.gca()
 75 |         plt.xticks(list(range(0,25,5)))
 76 |         plt.yticks(list(range(0,25,5)))
 77 |         self.axes.tick_params(axis='both',labelsize = 15)
 78 | 
 79 | 
 80 | 
 81 |     def act(self, state):
 82 |         state_i = state
 83 |         s = np.reshape(state_i, self._state_dim)
 84 |         
 85 |         action = self.q_network.get_action(s[None])[0]
 86 | 
 87 |         return action
 88 | 
 89 |     def train(self, state, action, reward, state_n, done):
 90 | 
 91 |         a = self.action_to_onehot(action)
 92 |         s = state
 93 |         s_n = state_n
 94 |         r = np.sum(reward)
 95 | 
 96 |         self.store_sample(s, a, r, s_n, done)
 97 |         self.update_network()
 98 |         return 0
 99 | 
100 |     def store_sample(self, s, a, r, s_n, done):
101 |         self.replay_buffer.add_to_memory((s, a, r, s_n, done))
102 |         return 0
103 | 
104 |     def update_network(self):
105 |         self.update_cnt += 1
106 |         if len(self.replay_buffer.replay_memory) < FLAGS.m_size * FLAGS.pre_train_step:
107 |             return 0
108 | 
109 |         minibatch = self.replay_buffer.sample_from_memory()
110 |         self.q_network.training_qnet(minibatch)
111 | 
112 |         if self.update_cnt % self.target_update_period == 0:
113 |             self.q_network.training_target_qnet()
114 |             if FLAGS.qtrace:
115 |                 if self.update_cnt % 10000 == 0:
116 |                     self.q_diff()
117 | 
118 |         if self.update_cnt % 10000 == 0:
119 |             self.saver.save(self.sess, config.nn_filename, self.update_cnt)
120 | 
121 |         return 0
122 | 
123 |     def state_to_index(self, state):
124 |         """
125 |         For the single agent case, the state is only related to the position of agent 1
126 |         :param state:
127 |         :return:
128 |         """
129 |         ret = np.zeros(self._state_dim)
130 |         for i in range(FLAGS.n_predator + FLAGS.n_prey):
131 |             p = np.argwhere(np.array(state)==i+1)[0]
132 |             ret[2 * i] = (p[0] - FLAGS.map_size /2.) / FLAGS.map_size
133 |             ret[2 * i + 1] = (p[1] - FLAGS.map_size /2.) / FLAGS.map_size
134 | 
135 | 
136 |         return ret
137 | 
138 |     def get_predator_pos(self, state):
139 |         """
140 |         return position of agent 1 and 2
141 |         :param state: input is state
142 |         :return:
143 |         """
144 |         state_list = list(np.array(state).ravel())
145 |         return state_list.index(1), state_list.index(2)
146 | 
147 |     def get_pos_by_id(self, state, id):
148 |         state_list = list(np.array(state).ravel())
149 |         return state_list.index(id)
150 | 
151 |     def onehot(self, index, size):
152 |         n_hot = np.zeros(size)
153 |         n_hot[index] = 1.0
154 |         return n_hot
155 | 
156 |     def index_to_action(self, index):
157 |         action_list = []
158 |         for i in range(FLAGS.n_predator-1):
159 |             action_list.append(index%5)
160 |             index = index/5
161 |         action_list.append(index)
162 |         return action_list
163 | 
164 |     def action_to_index(self, action):
165 |         index = 0
166 |         for i in range(FLAGS.n_predator):
167 |             index += action[i] * 5 ** i
168 |         return index
169 | 
170 |     def action_to_onehot(self, action):
171 |         onehot = np.zeros([self._n_player, self._action_dim_single])
172 |         for i in range(self._n_player):
173 |             onehot[i, action[i]] = 1
174 |         return onehot
175 | 
176 |     def q_diff(self):
177 | 
178 | 
179 |         print self.update_cnt
180 | 
181 |     def q(self, mode, step =0):
182 | 
183 |         if mode == 0:
184 |             q_value = np.zeros([2,2])
185 |             for i in range(2):
186 |                 for j in range(2):
187 |                     s = np.array([10])
188 |                     action = [i,j]
189 |                     a = self.action_to_onehot(action)
190 |                     q_value[i,j] = self.q_network.get_q_values(s[None],a[None])[0]
191 | 
192 | 
193 |             return q_value#, qi_value, p_value
194 | 
195 |         if mode == 1:
196 |             q_value = np.zeros([3,3])
197 |             for i in range(3):
198 |                 for j in range(3):
199 |                     s = np.array([1])
200 |                     action = [i,j]
201 |                     a = self.action_to_onehot(action)
202 |                     q_value[i,j] = self.q_network.get_q_values(s[None],a[None])[0]
203 | 
204 |             qi_value = self.q_network.get_qp_values(s[None])[0] 
205 | 
206 |             q_value2 = np.zeros([3,3])
207 |             for i in range(3):
208 |                 for j in range(3):
209 |                     s = np.array([2])
210 |                     action = [i,j]
211 |                     a = self.action_to_onehot(action)
212 |                     q_value2[i,j] = self.q_network.get_q_values(s[None],a[None])[0]
213 | 
214 |             qi_value2 = self.q_network.get_qp_values(s[None])[0]
215 | 
216 | 
217 | 
218 |             return q_value, qi_value, q_value2, qi_value2
219 | 
220 |         if mode == 2:
221 |             q_value = np.zeros([11])
222 |             for i in range(11):
223 |                 s = np.ones(10) * 0.5
224 |                 a = np.zeros([10,2])
225 |                 a[:10-i,0] = 1.
226 |                 a[10-i:,1] = 1.
227 |                 q_value[i] = self.q_network.get_q_values(s[None],a[None])[0]
228 |             
229 |             return q_value
230 | 
231 | 
232 |         elif mode == 3:
233 |             q_value_1 = np.zeros([2,2])
234 |             q_value_2 = np.zeros([2,2])
235 |             q_value_3 = np.zeros([2,2])
236 |             for i in range(2):
237 |                 for j in range(2):
238 |                     s = np.array([0])
239 |                     action = [i,j]
240 |                     a = self.action_to_onehot(action)
241 |                     q_value_1[i,j] = self.q_network.get_q_values(s[None],a[None])[0]
242 | 
243 |             q_value_2 = np.zeros([2,2])
244 |             for i in range(2):
245 |                 for j in range(2):
246 |                     s = np.array([1])
247 |                     action = [i,j]
248 |                     a = self.action_to_onehot(action)
249 |                     q_value_2[i,j] = self.q_network.get_q_values(s[None],a[None])[0]
250 |             
251 |             q_value_3 = np.zeros([2,2])
252 |             for i in range(2):
253 |                 for j in range(2):
254 |                     s = np.array([2])
255 |                     action = [i,j]
256 |                     a = self.action_to_onehot(action)
257 |                     q_value_3[i,j] = self.q_network.get_q_values(s[None],a[None])[0]
258 | 
259 |             return q_value_1, q_value_2, q_value_3
260 | 
261 |         
262 |         elif mode == 4:
263 |             samples = 1000
264 |             x = np.zeros(samples)
265 |             y = np.zeros(samples)
266 |             z = np.zeros(samples)
267 |             for i in range (samples):
268 |                 act_n = []
269 |                 for j in range(self._n_player):
270 |                     action = np.random.choice(self._action_dim_single)
271 |                     act_n.append(action)
272 |                 # act_n = np.array(list(bin(int(i))[2:].zfill(8)),dtype='int')
273 |                 a = self.action_to_onehot(act_n)
274 |                 s = np.ones(self._n_player) * 0.1
275 |                 x[i] = np.sum(np.array(act_n) * s)
276 |                 y[i] = self.q_network.get_q_values(s[None],a[None])[0]
277 |                 z[i] = self.q_network.get_qp_values(s[None],a[None])[0]
278 |             
279 |             order = np.argsort(x)
280 |             xs = np.array(x)[order]
281 |             ys = np.array(y)[order]
282 |             zs = np.array(z)[order]
283 | 
284 |             np.save(config.file_name + "1", xs)
285 |             np.save(config.file_name + "2", ys)
286 |             np.save(config.file_name + "3", zs)
287 | 
288 |             plt.scatter(xs, ys)
289 |             plt.xlim(0, 2.5)
290 |             plt.ylim(0, 18)
291 |             plt.xlabel('State-Action Fair')
292 |             plt.ylabel('Q-value')
293 |             plt.savefig(config.file_name + '-A1.png')
294 | 
295 |             plt.clf()
296 | 
297 |             plt.plot(xs, ys)
298 |             plt.xlim(0, 2.5)
299 |             plt.ylim(0, 18)
300 |             plt.xlabel('State-Action Fair')
301 |             plt.ylabel('Q-value')
302 |             plt.savefig(config.file_name + '-A2.png')
303 | 
304 |             plt.clf()
305 | 
306 |             plt.scatter(xs, zs)
307 |             plt.xlim(0, 2.5)
308 |             plt.ylim(0, 18)
309 |             plt.xlabel('State-Action Fair')
310 |             plt.ylabel('Q-value')
311 |             plt.savefig(config.file_name + '-B1.png')
312 | 
313 |             plt.clf()
314 | 
315 |             plt.plot(xs, zs)
316 |             plt.xlim(0, 2.5)
317 |             plt.ylim(0, 18)
318 |             plt.xlabel('State-Action Fair')
319 |             plt.ylabel('Q-value')
320 |             plt.savefig(config.file_name + '-B2.png')
321 | 
322 |             return "FINISH"
323 | 
324 |         elif mode == 5:
325 |             s = np.array([1])
326 |             Q_matrix = np.zeros([21,21])
327 |             Q_matrix2 = np.zeros([21,21])
328 |             for i in range(21):
329 |                 for j in range(21):
330 |                     act_n = np.array([i,j])
331 |                     a = self.action_to_onehot(act_n)
332 |                     Q_matrix[i,j] = np.mean(self.q_network.get_q_values(s[None],a[None]))
333 |                     Q_matrix2[i,j] = np.mean(self.q_network.get_qp_values(s[None],a[None]))
334 |             optimal_action = self.q_network.get_action(s[None])[0]
335 | 
336 |             ind = np.unravel_index(np.argmax(Q_matrix, axis=None), Q_matrix.shape)
337 |             print 'optimal_action', optimal_action, np.mean(self.q_network.get_q_values(s[None],self.action_to_onehot(optimal_action)[None]))
338 |             print 'ind', ind, self.q_network.get_q_values(s[None],self.action_to_onehot(ind)[None])[0]
339 | 
340 |             # plt.clf()
341 |             # self.fig = plt.figure(figsize=(4,4))
342 |             # self.ims = []
343 |             title = self.axes.text(0.5,1.05,"Step {}".format(step), 
344 |                     size=plt.rcParams["axes.titlesize"],
345 |                     ha="center", transform=self.axes.transAxes, )
346 |             print "ADD!"
347 |             self.ims.append([plt.pcolor(Q_matrix2,vmin=-10, vmax=10), title])
348 | 
349 | 
350 | 
351 |             return Q_matrix, Q_matrix2
352 | 
353 |         elif mode == 6:
354 |             im_ani = animation.ArtistAnimation(self.fig, self.ims, interval=200, #repeat_delay=3000,
355 |                                    blit=False)
356 |             im_ani.save(str(FLAGS.algorithm)+'.gif', dpi=80, writer='imagemagick')
357 |             return True
358 | 
359 | 
360 |         elif mode == 5:
361 |             
362 |             s = np.array([1] * FLAGS.n_predator)
363 |             optimal_action = self.q_network.get_action(s[None])[0]
364 |             r = np.sum(np.array(optimal_action))/10.
365 | 
366 |             x = np.linspace(0, 10, 1000)
367 |             y = np.array([r * np.exp( -np.square(r-5)/1) + r * np.exp(-np.square(r-8)/0.25) for r in x])
368 |             # a = np.load("1-" + "pqmix5" + "-" + str((i+1)*1000)+".npy")
369 |             x2 = np.sum(np.array(optimal_action))/10.
370 |             y2 = x2 * np.exp( -np.square(x2-5)/1) + x2 * np.exp(-np.square(x2-8)/0.25)
371 |             im = plt.plot(x,y,'black')
372 |             im2 = plt.plot([x2],[y2],'ro')
373 |             title = self.axes.text(0.5,1.05,"Step {}".format(step), 
374 |                     size=plt.rcParams["axes.titlesize"],
375 |                     ha="center", transform=self.axes.transAxes, )
376 |             self.ims.append(im2)
377 |             
378 | 
379 | 
380 |             s = np.array([1])
381 |             Q_matrix = np.zeros([21,21])
382 |             Q_matrix2 = np.zeros([21,21])
383 |             for i in range(21):
384 |                 for j in range(21):
385 |                     act_n = np.array([i,j])
386 |                     a = self.action_to_onehot(act_n)
387 |                     Q_matrix[i,j] = np.mean(self.q_network.get_q_values(s[None],a[None]))
388 |                     Q_matrix2[i,j] = np.mean(self.q_network.get_qp_values(s[None],a[None]))
389 |             optimal_action = self.q_network.get_action(s[None])[0]
390 | 
391 |             ind = np.unravel_index(np.argmax(Q_matrix, axis=None), Q_matrix.shape)
392 |             print 'optimal_action', optimal_action, np.mean(self.q_network.get_q_values(s[None],self.action_to_onehot(optimal_action)[None]))
393 |             print 'ind', ind, self.q_network.get_q_values(s[None],self.action_to_onehot(ind)[None])[0]
394 | 
395 |             # plt.clf()
396 |             # self.fig = plt.figure(figsize=(4,4))
397 |             # self.ims = []
398 |             title = self.axes.text(0.5,1.05,"Step {}".format(step), 
399 |                     size=plt.rcParams["axes.titlesize"],
400 |                     ha="center", transform=self.axes.transAxes, )
401 |             print "ADD!"
402 |             self.ims.append([plt.pcolor(Q_matrix2,vmin=-10, vmax=10), title])
403 | 
404 | 
405 | 
406 | 
407 | 
408 | 


--------------------------------------------------------------------------------
/Others/agents/pos_cac_fo/replay_buffer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf8
 3 | import logging
 4 | import config
 5 | from collections import deque
 6 | import random
 7 | 
 8 | FLAGS = config.flags.FLAGS
 9 | 
10 | logger = logging.getLogger("Agent.replay")
11 | result = logging.getLogger('Result')
12 | 
13 | 
14 | class ReplayBuffer:
15 |     def __init__(self):
16 |         self.replay_memory_capacity = FLAGS.b_size  # capacity of experience replay memory
17 |         self.minibatch_size = FLAGS.m_size  # size of minibatch from experience replay memory for updates
18 |         self.replay_memory = deque(maxlen=self.replay_memory_capacity)
19 | 
20 |     def add_to_memory(self, experience):
21 |         self.replay_memory.append(experience)
22 | 
23 |     def sample_from_memory(self):
24 |         return random.sample(self.replay_memory, self.minibatch_size)
25 | 
26 |     def erase(self):
27 |         self.replay_memory.popleft()
28 | 


--------------------------------------------------------------------------------
/Others/agents/pos_cac_fo/trainer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding=utf8
  3 | 
  4 | """
  5 | ===========================================
  6 |  :mod:`qlearn` Q-Learning
  7 | ===========================================
  8 | 
  9 | 
 10 | 설명
 11 | =====
 12 | 
 13 | Choose action based on q-learning algorithm
 14 | 
 15 | """
 16 | 
 17 | import numpy as np
 18 | from agents.pos_cac_fo.agent import Agent
 19 | from agents.simple_agent import RandomAgent as NonLearningAgent
 20 | from agents.evaluation import Evaluation
 21 | import logging
 22 | import config
 23 | 
 24 | FLAGS = config.flags.FLAGS
 25 | logger = logging.getLogger("Agent")
 26 | result = logging.getLogger('Result')
 27 | 
 28 | training_step = FLAGS.training_step
 29 | testing_step = FLAGS.testing_step
 30 | 
 31 | if FLAGS.epsilon == "No":
 32 |     print "No epsilon decreasing"
 33 |     epsilon_dec = 0.0/training_step
 34 | elif FLAGS.epsilon == "Yes":
 35 |     print "Epsilon decreasing"
 36 |     epsilon_dec = 2.0/training_step
 37 | epsilon_min = 0.1
 38 | 
 39 | 
 40 | class Trainer(object):
 41 | 
 42 |     def __init__(self, env):
 43 |         logger.info("Centralized DQN Trainer is created")
 44 | 
 45 |         self._env = env
 46 |         self._eval = Evaluation()
 47 |         self._n_predator = FLAGS.n_predator
 48 |         self._n_prey = FLAGS.n_prey
 49 |         self.action_dim = self._env.call_action_dim()
 50 |         self.state_dim = self._env.call_state_dim()
 51 | 
 52 |         self._agent = Agent(self.action_dim, self.state_dim)
 53 | 
 54 |         self.epsilon = 1.0
 55 | 
 56 |     def learn(self):
 57 | 
 58 |         step = 0
 59 |         episode = 0
 60 |         print_flag = False
 61 |         array = np.zeros([FLAGS.training_step/FLAGS.eval_step,4])
 62 |         while step < training_step:
 63 |             episode += 1
 64 |             ep_step = 0
 65 |             obs = self._env.reset()
 66 |             state = obs
 67 |             total_reward = 0
 68 | 
 69 |             while True:
 70 |                 step += 1
 71 |                 ep_step += 1
 72 |                 action = self.get_action(obs, step, state)
 73 |                 obs_n, reward, done, info = self._env.step(action)
 74 |                 state_n = obs_n
 75 |                 
 76 |                 done_single = sum(done) > 0
 77 |                 if ep_step >= FLAGS.max_step :
 78 |                     done_single = True
 79 |                 self.train_agents(state, action, reward, state_n, done_single)
 80 | 
 81 |                 obs = obs_n
 82 |                 state = state_n
 83 |                 total_reward += np.sum(reward) * (FLAGS.df ** (ep_step-1))
 84 |                 # if step % 100 ==0:
 85 |                 #     print step, self._agent.q()
 86 |                 if is_episode_done(done, step) or ep_step >= FLAGS.max_step :
 87 |                     if print_flag:
 88 |                         print "[train_ep %d]" % (episode),"\tstep:", step, "\tep_step:", ep_step, "\treward", total_reward
 89 |                     break
 90 |                 
 91 | 
 92 |             if episode % FLAGS.eval_step == 0:
 93 | 
 94 |                 self.test(episode)
 95 | 
 96 | 
 97 |         self._eval.summarize()
 98 | 
 99 | 
100 |     def get_action(self, obs, step, state, train=True):
101 |         act_n = []
102 |         self.epsilon = max(self.epsilon - epsilon_dec, epsilon_min)
103 | 
104 |         # Action of predator
105 |         action_list = self._agent.act(state)
106 | 
107 |         for i in range(self._n_predator):
108 |             if train and (step < FLAGS.m_size * FLAGS.pre_train_step or np.random.rand() < self.epsilon):  # with prob. epsilon
109 |                 action = np.random.choice(self.action_dim)
110 |                 act_n.append(action)
111 |             else:
112 |                 act_n.append(action_list[i])
113 |                 
114 | 
115 | 
116 |         return np.array(act_n, dtype=np.int32)
117 | 
118 |     def train_agents(self, state, action, reward, state_n, done):
119 |         self._agent.train(state, action, reward, state_n, done)
120 | 
121 |     def test(self, curr_ep=None):
122 | 
123 |         step = 0
124 |         episode = 0
125 | 
126 |         test_flag = FLAGS.kt
127 |         sum_reward = 0
128 |         while step < testing_step:
129 |             episode += 1
130 |             obs = self._env.reset()
131 |             state = obs
132 |             if test_flag:
133 |                 print "\nInit\n", state
134 |             total_reward = 0
135 | 
136 |             ep_step = 0
137 | 
138 |             while True:
139 | 
140 |                 step += 1
141 |                 ep_step += 1
142 | 
143 |                 action = self.get_action(obs, step, state, False)
144 | 
145 |                 obs_n, reward, done, info = self._env.step(action)
146 |                 state_n = obs_n
147 | 
148 |                 if test_flag:
149 |                     aa = raw_input('>')
150 |                     if aa == 'c':
151 |                         test_flag = False
152 |                     print action
153 |                     print state_n
154 |                     print reward
155 | 
156 |                 obs = obs_n
157 |                 state = state_n
158 |                 total_reward += np.sum(reward) * (FLAGS.df ** (ep_step-1))
159 | 
160 |                 if is_episode_done(done, step, "test") or ep_step >= FLAGS.max_step:
161 |                     break
162 |             sum_reward += total_reward
163 | 
164 |         print "Algorithm ", FLAGS.algorithm, ",Average reward: ", curr_ep, sum_reward /episode
165 |         self._eval.update_value("test_result", sum_reward /episode, curr_ep)
166 | 
167 | 
168 | 
169 | def is_episode_done(done, step, e_type="train"):
170 | 
171 |     if e_type == "test":
172 |         if sum(done) > 0 or step >= FLAGS.testing_step:
173 |             return True
174 |         else:
175 |             return False
176 | 
177 |     else:
178 |         if sum(done) > 0 or step >= FLAGS.training_step:
179 |             return True
180 |         else:
181 |             return False
182 | 
183 | 
184 | 


--------------------------------------------------------------------------------
/Others/agents/replay_buffer.py:
--------------------------------------------------------------------------------
 1 | from collections import deque
 2 | import random
 3 | import config
 4 | import numpy as np
 5 | 
 6 | FLAGS = config.flags.FLAGS
 7 | 
 8 | replay_memory_capacity = FLAGS.replay_buffer_capacity  # capacity of experience replay memory
 9 | minibatch_size = FLAGS.minibatch_size  # size of minibatch from experience replay memory for updates
10 | trace_length = FLAGS.rnn_trace_len
11 | 
12 | class ReplayBuffer:
13 | 	def __init__(self):
14 | 		self.replay_memory = deque(maxlen=replay_memory_capacity)
15 | 
16 | 	def add_to_memory(self, experience):
17 | 		self.replay_memory.append(experience)
18 | 
19 | 	def sample_from_memory(self):
20 | 		return random.sample(self.replay_memory, minibatch_size)
21 | 
22 | class RNNReplayBuffer:
23 | 	def __init__(self):
24 | 		self.replay_memory = deque(maxlen=replay_memory_capacity)
25 | 		self.paddings = None
26 | 
27 | 	def add_to_memory(self, experience):
28 | 		self.replay_memory.append(experience)
29 | 
30 | 		if self.paddings == None:
31 | 			obs = np.zeros(experience[-1][0].shape)
32 | 			self.paddings = (obs, 0, 0, obs, True)
33 | 
34 | 	def pad_trace(self, trace):
35 | 		trace.extend([self.paddings]*(trace_length-len(trace)))
36 | 		return trace
37 | 
38 | 	def sample_from_memory(self):
39 | 		if len(self.replay_memory) < minibatch_size:
40 | 			n_points_per_ep = int(np.ceil(minibatch_size * 1./len(self.replay_memory)))
41 | 			sampled_episodes = self.replay_memory
42 | 		else:
43 | 			n_points_per_ep = 1
44 | 			sampled_episodes = random.sample(self.replay_memory, minibatch_size)
45 | 		
46 | 		sampledTraces = []
47 | 		true_trace_length = np.ones(minibatch_size)*trace_length
48 | 
49 | 		for i in range(n_points_per_ep):
50 | 			for j, episode in enumerate(sampled_episodes):
51 | 				if len(episode) < trace_length:					
52 | 					true_trace_length[j] = len(episode)
53 | 					sampledTraces.append(self.pad_trace(episode)) # use the whole episode
54 | 				else:
55 | 					point = np.random.randint(0,len(episode) + 1 - trace_length)
56 | 					sampledTraces.append(episode[point:point + trace_length])
57 | 
58 | 		sampledTraces = np.array(sampledTraces[:minibatch_size]) # discard extra samples
59 | 		sampledTraces = np.reshape(sampledTraces,[minibatch_size*trace_length,-1])
60 | 		return sampledTraces, true_trace_length
61 | 


--------------------------------------------------------------------------------
/Others/agents/simple_agent.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class RandomAgent(object):
 4 |     def __init__(self, action_dim):
 5 |         self._action_dim = action_dim
 6 | 
 7 |     def act(self, obs):
 8 |         return np.random.randint(self._action_dim)
 9 | 
10 |     def train(self, minibatch, step):
11 |         return
12 | 
13 | class StaticAgent(object):
14 |     def __init__(self, action):
15 |         self._action = action
16 | 
17 |     def act(self, obs):
18 |         return self._action
19 | 
20 |     def train(self, minibatch, step):
21 |         return


--------------------------------------------------------------------------------
/Others/config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf8
 3 | 
 4 | import tensorflow as tf
 5 | import logging
 6 | import time
 7 | import envs.config_env as config_env
 8 | import agents.config_agents as config_agent
 9 | 
10 | flags = tf.flags
11 | 
12 | flags.DEFINE_integer("seed", 0, "Random seed number")
13 | flags.DEFINE_string("folder", "default", "Result file folder name")
14 | 
15 | config_env.config_env(flags)
16 | config_agent.config_agent(flags)
17 | 
18 | # Make result file with given filename
19 | now = time.localtime()
20 | s_time = "%02d%02d%02d%02d%02d" % (now.tm_mon, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec)
21 | file_name = str(flags.FLAGS.n_predator) + "-"
22 | file_name += config_env.get_filename() + "-" + config_agent.get_filename()
23 | file_name += "-seed-"+str(flags.FLAGS.seed)+"-" + s_time
24 | result = logging.getLogger('Result')
25 | result.setLevel(logging.INFO)
26 | 
27 | if flags.FLAGS.folder == "default":
28 |     result_fh = logging.FileHandler("./results/eval/r-" + file_name + ".txt")
29 |     nn_filename = "./results/nn/n-" + file_name
30 | else:
31 |     result_fh = logging.FileHandler("./results/eval/"+ flags.FLAGS.folder +"/r-" + file_name + ".txt")
32 |     nn_filename = "./results/nn/" + flags.FLAGS.folder + "/n-" + file_name
33 | 
34 | 
35 | result_fm = logging.Formatter('[%(filename)s:%(lineno)s] %(asctime)s\t%(message)s')
36 | result_fh.setFormatter(result_fm)
37 | result.addHandler(result_fh)
38 | 
39 | # Used to map colors to integers
40 | COLOR_TO_IDX = {
41 |     'red'   : 0,
42 |     'green' : 1,
43 |     'blue'  : 2,
44 |     'purple': 3,
45 |     'yellow': 4,
46 |     'grey'  : 5
47 | }
48 | 
49 | IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys()))
50 | 
51 | # Map of object type to integers
52 | OBJECT_TO_IDX = {
53 |     'empty'         : 0,
54 |     'wall'          : 1,
55 |     'agent'         : 2,
56 |     'predator'      : 3,
57 |     'prey'          : 4
58 | }
59 | 
60 | IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys()))


--------------------------------------------------------------------------------
/Others/envs/#environment.py#:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym import spaces
  3 | import numpy as np
  4 | import config
  5 | 
  6 | # environment for all agents in the multiagent world
  7 | # currently code assumes that no agents will be created/destroyed at runtime!
  8 | 
  9 | FLAGS = config.flags.FLAGS
 10 | 
 11 | penalty = FLAGS.penalty
 12 | agents = FLAGS.n_predator
 13 | 
 14 | class MultiAgentEnv(gym.Env):
 15 |     metadata = {
 16 |         'render.modes': ['human', 'rgb_array']
 17 |     }
 18 | 
 19 |     def __init__(self, world, reset_callback=None, reward_callback=None,
 20 |                  observation_callback=None, info_callback=None,
 21 |                  done_callback=None, shared_viewer=True):
 22 | 
 23 |         self.world = world
 24 |         self.agents = self.world.agents
 25 |         # set required vectorized gym env property
 26 |         self.n = len(world.agents)
 27 |         # scenario callbacks
 28 |         self.reset_callback = reset_callback
 29 |         self.reward_callback = reward_callback
 30 |         self.observation_callback = observation_callback
 31 |         self.info_callback = info_callback
 32 |         self.done_callback = done_callback
 33 |       
 34 |         # environment parameters
 35 |         self.discrete_comm_space = True
 36 |         self.time = 0
 37 | 
 38 |         # configure spaces
 39 |         self.action_space = []
 40 |         self.observation_space = []
 41 |         self.agent_precedence = []
 42 |         for agent in self.agents:
 43 |             self.agent_precedence.append(agent.itype)
 44 |             total_action_space = []
 45 |             u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
 46 |             total_action_space.append(u_action_space)
 47 | 
 48 |             # communication action space
 49 |             if self.discrete_comm_space:
 50 |                 c_action_space = spaces.Discrete(world.dim_c)
 51 |             else:
 52 |                 c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,))
 53 | 
 54 |             if not agent.silent:
 55 |                 total_action_space.append(c_action_space)
 56 | 
 57 |             # total action space
 58 |             if len(total_action_space) > 1:
 59 |                 # all action spaces are discrete, so simplify to MultiDiscrete action space
 60 |                 if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]):
 61 |                     act_space = spaces.MultiDiscrete([act_space.n for act_space in total_action_space])
 62 |                 else: 
 63 |                     act_space = spaces.Tuple(total_action_space)
 64 |                 self.action_space.append(act_space)
 65 |             else:
 66 |                 self.action_space.append(total_action_space[0])
 67 | 
 68 |             # observation space
 69 |             obs_dim = len(observation_callback(agent, self.world).flatten())
 70 |             self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,)))
 71 |             agent.action.c = np.zeros(self.world.dim_c)
 72 | 
 73 |     def get_agent_profile(self):
 74 |         agent_profile = {}
 75 | 
 76 |         for i, agent in enumerate(self.agents):
 77 |             if agent.itype in agent_profile:
 78 |                 agent_profile[agent.itype]['n_agent'] += 1
 79 |                 agent_profile[agent.itype]['idx'].append(i)
 80 |             else:
 81 |                 if isinstance(self.action_space[i], spaces.Discrete):
 82 |                     act_space = self.action_space[i].n
 83 |                     com_space = 0
 84 |                 else:
 85 |                     act_space = self.action_space[i].nvec[0]
 86 |                     com_space = self.action_space[i].nvec[1]
 87 | 
 88 |                 agent_profile[agent.itype] = {
 89 |                     'n_agent': 1,
 90 |                     'idx': [i],
 91 |                     'act_dim': act_space,
 92 |                     'com_dim': com_space,
 93 |                     'obs_dim': self.observation_space[i].shape
 94 |                 }
 95 | 
 96 |         return agent_profile
 97 | 
 98 |     def step(self, action_n):
 99 |         obs_n = []
100 |         reward_n = []
101 |         done_n = []
102 |         info_n = {'n': []}
103 | 
104 |         self.agents = self.world.agents
105 |         self.world.step(action_n)
106 | 
107 |         for agent in self.agents:
108 |             obs_n.append(self._get_obs(agent))
109 |             reward_n.append(self._get_reward(agent))
110 |             done_n.append(self._get_done(agent))
111 |             info_n['n'].append(self._get_info(agent))
112 | 
113 |         return obs_n, reward_n, done_n, info_n
114 | 
115 |     def reset(self):
116 |         # reset world
117 |         self.reset_callback(self.world)
118 | 
119 |         obs_n = []
120 |         for agent in self.agents:
121 |             obs_n.append(self._get_obs(agent))
122 |         return obs_n
123 | 
124 |     # get info used for benchmarking
125 |     def _get_info(self, agent):
126 |         if self.info_callback is None:
127 |             return {}
128 |         return self.info_callback(agent, self.world)
129 | 
130 |     # get observation for a particular agent
131 |     def _get_obs(self, agent):
132 |         if self.observation_callback is None:
133 |             return np.zeros(0)
134 |         return self.observation_callback(agent, self.world)
135 | 
136 |     # get dones for a particular agent
137 |     # unused right now -- agents are allowed to go beyond the viewing screen
138 |     def _get_done(self, agent):
139 |         if self.done_callback is None:
140 |             return False
141 |         return self.done_callback(agent, self.world)
142 | 
143 |     # get reward for a particular agent
144 |     def _get_reward(self, agent):
145 |         if self.reward_callback is None:
146 |             return 0.0
147 |         return self.reward_callback(agent, self.world)
148 | 
149 |     def get_full_encoding(self):
150 |         return self.world.get_full_encoding()
151 | 
152 | class MultiAgentSimpleEnv1(gym.Env):
153 |     def __init__(self, n_predator=1):
154 |         
155 |         # self.state = [0]
156 |         self.action_dim = 2
157 |         self.state_dim = 3
158 | 
159 |         self.state = np.array([1,0,0])
160 |         self.payoff_1 = np.array([[7.,7.],[7.,7.]])
161 |         # self.payoff_2 = np.array([[0.,1.],[1.,8.]])
162 |         self.payoff_2 = np.array([[0.,1.],[1.,8.]])
163 | 
164 |     def reset(self):
165 | 
166 |         self.state = np.array([1,0,0])
167 |         
168 |         return self.state
169 | 
170 |     def step(self, action):
171 | 
172 |         info = {'n': []}
173 |         reward = []
174 |         done = []
175 | 
176 |         if self.state[0] == 1:
177 |             if action[0] == 0:
178 |                 self.state = np.array([0,1,0])
179 |                 reward.append(0.)
180 |                 done.append(False)
181 |             elif action[0] == 1:
182 |                 self.state = np.array([0,0,1])
183 |                 reward.append(0.)
184 |                 done.append(False)
185 |         elif self.state[1] == 1:
186 |             self.state = np.array([0,0,0])
187 |             reward.append(self.payoff_1[action[0],action[1]])
188 |             done.append(True)
189 |         elif self.state[2] == 1:
190 |             self.state = np.array([0,0,0])
191 |             reward.append(self.payoff_2[action[0],action[1]])
192 |             done.append(True)
193 | 
194 |         return self.state, reward, done, info
195 | 
196 |     def call_action_dim(self):
197 |         return self.action_dim
198 | 
199 |     def call_state_dim(self):
200 |         return self.state_dim
201 | 
202 | 
203 | class MultiAgentSimpleEnv2(gym.Env):
204 |     def __init__(self, n_predator=1):
205 |         
206 |         self.state = [10]
207 |         self.action_dim = 3
208 |         self.state_dim = 1
209 | 
210 |         # self.payoff2 = np.array([[10.,3.,-10.],[2.,1.,-10.],[-10.,-10.,10.]])
211 |         # self.payoff2 = np.array([[10,-10,-10],[-10,1,2],[-10,3,5]])
212 |         # self.payoff2 = np.array([[10.,9.,5.],[9.,5.,1.],[5.,1.,0.]])      
213 |         # self.payoff2 = np.array([[12.60697365,  7.6309042,   6.60896063], [ 7.62529612,  2.64922833,  1.62728453], [ 6.71724892,  1.74118018,  0.71923661]])
214 |         # self.payoff2 = np.array([[10,6,5],[6,4,2],[5,3,1]]) # Climbing game
215 |         # self.payoff2 = np.array([[5,0,0],[6,7,-30],[0,-30,11]])
216 |         # self.payoff2 = np.array([[8,-penalty,-penalty],[-penalty,0,0],[-penalty,0,0]])
217 |         self.payoff2 = np.array([[10.0,0.,0.],[0.,2.,0.],[0.,0.,10.0]]) # Penalty game
218 | 
219 | 
220 |     def reset(self):
221 | 
222 |         self.state = [10]
223 |         
224 |         return self.state
225 | 
226 |     def step(self, action):
227 | 
228 |         info = {'n': []}
229 |         reward = []
230 |         done = []
231 |         reward.append(self.payoff2[action[0],action[1]])
232 |         self.state = [3]
233 |         done.append(True)
234 | 
235 |         return self.state, reward, done, info
236 | 
237 |     def call_action_dim(self):
238 |         return self.action_dim
239 | 
240 |     def call_state_dim(self):
241 |         return self.state_dim
242 | 
243 | class MultiAgentSimpleEnv3(gym.Env):
244 |     def __init__(self, n_predator=1):
245 |         
246 |         self.state = [0]
247 |         self.action_dim = 2
248 |         self.state_dim = 1
249 | 
250 |         # self.payoff2 = np.array([[10.,3.,-10.],[2.,1.,-10.],[-10.,-10.,10.]])
251 |         # self.payoff2 = np.array([[10,-10,-10],[-10,1,2],[-10,3,5]])
252 |         # self.payoff2 = np.array([[10.,9.,8.],[9.,2.,1.],[8.,1.,0.]])      
253 |         # self.payoff2 = np.array([[12.60697365,  7.6309042,   6.60896063], [ 7.62529612,  2.64922833,  1.62728453], [ 6.71724892,  1.74118018,  0.71923661]])
254 |         # self.payoff2 = np.array([[11,-30,0],[-30,7,6],[0,0,5]]) # Climbing game
255 |         # self.payoff2 = np.array([[5,0,0],[6,7,-30],[0,-30,11]])
256 |         # self.payoff2 = np.array([[10.0,0.,-10.],[0.,2.,0.],[-10.,0.,10.]]) # Penalty game
257 | 
258 |         # self.payoff1 = np.array([[20,-10,0],[-10,2,6],[0,8,-10]])
259 |         # self.payoff2 = np.array([[-10,8,0],[6,2,-10],[0,-10,10]])
260 | 
261 |         self.payoff1 = np.array([[6,7],[8,9]])
262 |         self.payoff2 = np.array([[0,1 - penalty],[1 - penalty,10]])
263 | 
264 | 
265 |         # if np.random.randint(2) == 0:
266 |         #     self.state = [1]
267 |         # else:
268 |         #     self.state = [2]
269 |     def reset(self):
270 | 
271 |         # if np.random.randint(2) == 0:
272 |         #     self.state = [1]
273 |         # else:
274 |         #     self.state = [2]
275 |         self.state = [0]
276 | 
277 |         return self.state
278 | 
279 |     def step(self, action):
280 | 
281 |         info = {'n': []}
282 |         reward = []
283 |         done = []
284 |         if self.state[0] == 0:
285 |             reward.append(0)
286 |             if action[0] == 0:
287 |                 self.state = [1]
288 |             else:
289 |                 self.state = [2]
290 |             done.append(False)
291 |         elif self.state[0] == 1:
292 |             reward.append(self.payoff1[action[0],action[1]])
293 |             self.state = [3]
294 |             done.append(True)
295 |         else:
296 |             reward.append(self.payoff2[action[0],action[1]])
297 |             self.state = [3]
298 |             done.append(True)
299 |         # self.state = [3]
300 |         # done.append(True)
301 |         # print self.state[0], action[0], action[1], reward
302 |         return self.state, reward, done, info
303 | 
304 |     def call_action_dim(self):
305 |         return self.action_dim
306 | 
307 |     def call_state_dim(self):
308 |         return self.state_dim
309 | 
310 | 
311 | class MultiAgentSimpleEnv4(gym.Env):
312 |     def __init__(self, n_predator=1):
313 |         
314 |         self.state = np.random.uniform(0,2,agents)
315 |         # self.state = np.ones(agents)
316 |         # self.state = self.state / np.sum(self.state) * 10
317 |         # self.state = np.ones(agents) * 0.5
318 |         self.action_dim = 11
319 |         self.state_dim = agents
320 | 
321 | 
322 | 
323 |     def reset(self):
324 | 
325 |         self.state = np.random.uniform(0.0,1.0,agents)
326 |         # self.state = np.ones(agents)
327 |         # self.state = self.state / np.sum(self.state) * 10
328 |         # self.state = self.state
329 |         # self.state = np.ones(agents) * 0.5
330 |         
331 |         return self.state
332 | 
333 |     def step(self, action):
334 | 
335 |         info = {'n': []}
336 |         reward = []
337 |         done = []
338 |         # if np.sum(np.array(action)) == 0:
339 |         #     reward.append(0)
340 |         # if np.sum(np.array(action)) == 1:
341 |         #     np.sum(np.array(action) * self.state)
342 |         # else:
343 |         #     r = (-1) * np.sum(np.array(action) * self.state) * penalty * 0.1
344 |         #     reward.append(r)
345 |         r = np.sum(np.array(action) * self.state)
346 |         # print np.array(action)
347 |         # print self.state
348 |         # print 'reward:', r
349 |         th = 20.0
350 |         th2 = 40.0
351 |         th3 = 50.0
352 |         # if r < -th:
353 |         #     reward.append(abs(r))
354 | 
355 |         # if r < th2:
356 |         #     reward.append( -np.square(r-th) / 40. + 10)
357 |         # # elif r < th2:
358 |         # #    reward.append( (2*th - r) / 2 )
359 | 
360 |         # else:
361 |         #     reward.append( -np.square(r-th3) / 5. + 20)
362 |     # 10*e^(-(x-30)^2/100) + 20 *e^(-(x-90)^2/400)
363 |         rv = 10 * np.exp( -np.square(r-25) / 50) + 15 * np.exp( -np.squarv = 10 * np.exp( -np.square(r-25) / 50) + 15 \│························
364 |                                                                 * np.exp( -np.square(r-40)/10)re(r-40)/10)
365 |         # print r
366 |         # print rv
367 |         # 10*e^(-(x-35)^2/100) + 15 *e^(-(x-80)^2/100)
368 |         reward.append(rv)
369 | 
370 |         # total_reward = 0
371 |         # th = 1.0
372 |         # for i in range(self.action_dim-1):
373 |         #     idx = np.where(np.array(action) == i+1, 1.0, 0.0)
374 |         #     r = np.sum(idx * self.state)
375 |         #     if r > th:
376 |         #         total_reward += th - (r-th) * penalty
377 |         #     else:
378 |         #         total_reward += r
379 |         # reward.append(total_reward)
380 | 
381 |         # r = abs(np.count_nonzero(action) - (agents - np.count_nonzero(action)))
382 |         # if r == 0:
383 |         #     reward.append(10)
384 |         # else:
385 |         #     reward.append( (r-10) * penalty)
386 |         # self.state = np.random.uniform(0,1,agents)
387 |         # self.state = np.ones(10) * 0.5
388 |         done.append(True)
389 | 
390 |         return self.state, reward, done, info
391 | 
392 |     def call_action_dim(self):
393 |         return self.action_dim
394 | 
395 |     def call_state_dim(self):
396 |         return self.state_dim
397 | 


--------------------------------------------------------------------------------
/Others/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | # from gym.envs.registration import register
 3 | 
 4 | # Multiagent envs
 5 | # ----------------------------------------
 6 | 
 7 | # register(
 8 | #     id='MultiagentSimple-v0',
 9 | #     entry_point='multiagent.envs:SimpleEnv',
10 | #     # FIXME(cathywu) currently has to be exactly max_path_length parameters in
11 | #     # rllab run script
12 | #     max_episode_steps=100,
13 | # )
14 | 
15 | # register(
16 | #     id='MultiagentSimpleSpeakerListener-v0',
17 | #     entry_point='multiagent.envs:SimpleSpeakerListenerEnv',
18 | #     max_episode_steps=100,
19 | # )
20 | 
21 | logger_agent = logging.getLogger('GridMARL')
22 | logger_agent.setLevel(logging.INFO)
23 | 
24 | fm = logging.Formatter('[%(levelname)s|%(filename)s:%(lineno)s] %(asctime)s > [%(name)s] %(message)s')
25 | sh = logging.StreamHandler()
26 | sh.setFormatter(fm)
27 | logger_agent.addHandler(sh)
28 | 
29 | # fh_agent = logging.FileHandler('./agent.log')
30 | # fh_agent.setFormatter(fm)
31 | # logger_agent.addHandler(fh_agent)
32 | 


--------------------------------------------------------------------------------
/Others/envs/config_env.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf8
 3 | 
 4 | def config_env(_flags):
 5 |     flags = _flags
 6 | 
 7 |     # Scenario
 8 |     flags.DEFINE_string("scenario", "pursuit", "Scenario")
 9 |     flags.DEFINE_integer("n_predator", 2, "Number of predators")
10 |     flags.DEFINE_integer("n_prey", 1, "Number of preys")
11 | 
12 |     # Observation
13 |     flags.DEFINE_integer("history_len", 1, "How many previous steps we look back")
14 | 
15 |     # core
16 |     flags.DEFINE_integer("map_size", 3, "Size of the map")
17 |     flags.DEFINE_float("render_every", 1000, "Render the nth episode")
18 |     flags.DEFINE_integer("penalty", 10, "penalty")
19 | 
20 | def get_filename():
21 |     import config
22 |     FLAGS = config.flags.FLAGS
23 | 
24 |     return "s-"+FLAGS.scenario+"-map-"+str(FLAGS.map_size) +"-penalty-"+str(FLAGS.penalty)


--------------------------------------------------------------------------------
/Others/envs/environment.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym import spaces
  3 | import numpy as np
  4 | import config
  5 | 
  6 | # environment for all agents in the multiagent world
  7 | # currently code assumes that no agents will be created/destroyed at runtime!
  8 | 
  9 | FLAGS = config.flags.FLAGS
 10 | 
 11 | penalty = FLAGS.penalty
 12 | agents = FLAGS.n_predator
 13 | 
 14 | class MultiAgentEnv(gym.Env):
 15 |     metadata = {
 16 |         'render.modes': ['human', 'rgb_array']
 17 |     }
 18 | 
 19 |     def __init__(self, world, reset_callback=None, reward_callback=None,
 20 |                  observation_callback=None, info_callback=None,
 21 |                  done_callback=None, shared_viewer=True):
 22 | 
 23 |         self.world = world
 24 |         self.agents = self.world.agents
 25 |         # set required vectorized gym env property
 26 |         self.n = len(world.agents)
 27 |         # scenario callbacks
 28 |         self.reset_callback = reset_callback
 29 |         self.reward_callback = reward_callback
 30 |         self.observation_callback = observation_callback
 31 |         self.info_callback = info_callback
 32 |         self.done_callback = done_callback
 33 |       
 34 |         # environment parameters
 35 |         self.discrete_comm_space = True
 36 |         self.time = 0
 37 | 
 38 |         # configure spaces
 39 |         self.action_space = []
 40 |         self.observation_space = []
 41 |         self.agent_precedence = []
 42 |         for agent in self.agents:
 43 |             self.agent_precedence.append(agent.itype)
 44 |             total_action_space = []
 45 |             u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
 46 |             total_action_space.append(u_action_space)
 47 | 
 48 |             # communication action space
 49 |             if self.discrete_comm_space:
 50 |                 c_action_space = spaces.Discrete(world.dim_c)
 51 |             else:
 52 |                 c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,))
 53 | 
 54 |             if not agent.silent:
 55 |                 total_action_space.append(c_action_space)
 56 | 
 57 |             # total action space
 58 |             if len(total_action_space) > 1:
 59 |                 # all action spaces are discrete, so simplify to MultiDiscrete action space
 60 |                 if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]):
 61 |                     act_space = spaces.MultiDiscrete([act_space.n for act_space in total_action_space])
 62 |                 else: 
 63 |                     act_space = spaces.Tuple(total_action_space)
 64 |                 self.action_space.append(act_space)
 65 |             else:
 66 |                 self.action_space.append(total_action_space[0])
 67 | 
 68 |             # observation space
 69 |             obs_dim = len(observation_callback(agent, self.world).flatten())
 70 |             self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,)))
 71 |             agent.action.c = np.zeros(self.world.dim_c)
 72 | 
 73 |     def get_agent_profile(self):
 74 |         agent_profile = {}
 75 | 
 76 |         for i, agent in enumerate(self.agents):
 77 |             if agent.itype in agent_profile:
 78 |                 agent_profile[agent.itype]['n_agent'] += 1
 79 |                 agent_profile[agent.itype]['idx'].append(i)
 80 |             else:
 81 |                 if isinstance(self.action_space[i], spaces.Discrete):
 82 |                     act_space = self.action_space[i].n
 83 |                     com_space = 0
 84 |                 else:
 85 |                     act_space = self.action_space[i].nvec[0]
 86 |                     com_space = self.action_space[i].nvec[1]
 87 | 
 88 |                 agent_profile[agent.itype] = {
 89 |                     'n_agent': 1,
 90 |                     'idx': [i],
 91 |                     'act_dim': act_space,
 92 |                     'com_dim': com_space,
 93 |                     'obs_dim': self.observation_space[i].shape
 94 |                 }
 95 | 
 96 |         return agent_profile
 97 | 
 98 |     def step(self, action_n):
 99 |         obs_n = []
100 |         reward_n = []
101 |         done_n = []
102 |         info_n = {'n': []}
103 | 
104 |         self.agents = self.world.agents
105 |         self.world.step(action_n)
106 | 
107 |         for agent in self.agents:
108 |             obs_n.append(self._get_obs(agent))
109 |             reward_n.append(self._get_reward(agent))
110 |             done_n.append(self._get_done(agent))
111 |             info_n['n'].append(self._get_info(agent))
112 | 
113 |         return obs_n, reward_n, done_n, info_n
114 | 
115 |     def reset(self):
116 |         # reset world
117 |         self.reset_callback(self.world)
118 | 
119 |         obs_n = []
120 |         for agent in self.agents:
121 |             obs_n.append(self._get_obs(agent))
122 |         return obs_n
123 | 
124 |     # get info used for benchmarking
125 |     def _get_info(self, agent):
126 |         if self.info_callback is None:
127 |             return {}
128 |         return self.info_callback(agent, self.world)
129 | 
130 |     # get observation for a particular agent
131 |     def _get_obs(self, agent):
132 |         if self.observation_callback is None:
133 |             return np.zeros(0)
134 |         return self.observation_callback(agent, self.world)
135 | 
136 |     # get dones for a particular agent
137 |     # unused right now -- agents are allowed to go beyond the viewing screen
138 |     def _get_done(self, agent):
139 |         if self.done_callback is None:
140 |             return False
141 |         return self.done_callback(agent, self.world)
142 | 
143 |     # get reward for a particular agent
144 |     def _get_reward(self, agent):
145 |         if self.reward_callback is None:
146 |             return 0.0
147 |         return self.reward_callback(agent, self.world)
148 | 
149 |     def get_full_encoding(self):
150 |         return self.world.get_full_encoding()
151 | 
152 | class MultiAgentSimpleEnv1(gym.Env):
153 |     def __init__(self, n_predator=1):
154 |         
155 |         # self.state = [0]
156 |         self.action_dim = 2
157 |         self.state_dim = 3
158 | 
159 |         self.state = np.array([1,0,0])
160 |         self.payoff_1 = np.array([[7.,7.],[7.,7.]])
161 |         # self.payoff_2 = np.array([[0.,1.],[1.,8.]])
162 |         self.payoff_2 = np.array([[0.,1.],[1.,8.]])
163 | 
164 |     def reset(self):
165 | 
166 |         self.state = np.array([1,0,0])
167 |         
168 |         return self.state
169 | 
170 |     def step(self, action):
171 | 
172 |         info = {'n': []}
173 |         reward = []
174 |         done = []
175 | 
176 |         if self.state[0] == 1:
177 |             if action[0] == 0:
178 |                 self.state = np.array([0,1,0])
179 |                 reward.append(0.)
180 |                 done.append(False)
181 |             elif action[0] == 1:
182 |                 self.state = np.array([0,0,1])
183 |                 reward.append(0.)
184 |                 done.append(False)
185 |         elif self.state[1] == 1:
186 |             self.state = np.array([0,0,0])
187 |             reward.append(self.payoff_1[action[0],action[1]])
188 |             done.append(True)
189 |         elif self.state[2] == 1:
190 |             self.state = np.array([0,0,0])
191 |             reward.append(self.payoff_2[action[0],action[1]])
192 |             done.append(True)
193 | 
194 |         return self.state, reward, done, info
195 | 
196 |     def call_action_dim(self):
197 |         return self.action_dim
198 | 
199 |     def call_state_dim(self):
200 |         return self.state_dim
201 | 
202 | 
203 | class MultiAgentSimpleEnv2(gym.Env): #Matrix game
204 |     def __init__(self, n_predator=1):
205 |         
206 |         self.state = [1]
207 |         self.action_dim = 3
208 |         self.state_dim = 1
209 | 
210 |         self.payoff2 = np.array([[8.,-12.,-12.],[-12.,0.,0.],[-12.,0.,0.]])
211 | 
212 | 
213 | 
214 |     def reset(self):
215 | 
216 |         self.state = [1]
217 |         
218 |         return self.state
219 | 
220 |     def step(self, action):
221 | 
222 |         info = {'n': []}
223 |         reward = []
224 |         done = []
225 |         reward.append(self.payoff2[action[0],action[1]])
226 |         self.state = [3]
227 |         done.append(True)
228 | 
229 |         return self.state, reward, done, info
230 | 
231 |     def call_action_dim(self):
232 |         return self.action_dim
233 | 
234 |     def call_state_dim(self):
235 |         return self.state_dim
236 | 
237 | class MultiAgentSimpleEnv3(gym.Env): #Partial observation
238 |     def __init__(self, n_predator=1):
239 |         
240 |         self.state = [0]
241 |         self.action_dim = 3
242 |         self.state_dim = 1
243 | 
244 | 
245 | 
246 |         self.payoff1 = np.array([[10,8,5],[8,6,3],[5,3,0]])
247 |         self.payoff2 = np.array([[0,3,5],[3,6,8],[5,8,10]])
248 | 
249 |         # self.payoff1 = np.array([[6,7],[8,9]])
250 |         # self.payoff2 = np.array([[0,1 - penalty],[1 - penalty,10]])
251 | 
252 | 
253 |         if np.random.randint(2) == 0:
254 |             self.state = [1]
255 |         else:
256 |             self.state = [2]
257 |     def reset(self):
258 | 
259 |         if np.random.randint(2) == 0:
260 |             self.state = [1]
261 |         else:
262 |             self.state = [2]
263 |         # self.state = [0]
264 | 
265 |         return self.state
266 | 
267 |     def step(self, action):
268 | 
269 |         info = {'n': []}
270 |         reward = []
271 |         done = []
272 | 
273 |         if self.state[0] == 1:
274 |             reward.append(self.payoff1[action[0],action[1]])
275 |             self.state = [3]
276 |             done.append(True)
277 |         else:
278 |             reward.append(self.payoff2[action[0],action[1]])
279 |             self.state = [3]
280 |             done.append(True)
281 |         # self.state = [3]
282 |         # done.append(True)
283 |         # print self.state[0], action[0], action[1], reward
284 |         return self.state, reward, done, info
285 | 
286 |     def call_action_dim(self):
287 |         return self.action_dim
288 | 
289 |     def call_state_dim(self):
290 |         return self.state_dim
291 | 
292 | 
293 | class MultiAgentSimpleEnv4(gym.Env): #Gaussian Squeeze
294 |     def __init__(self, n_predator=1):
295 |         
296 |         self.state = np.random.uniform(0.,2.,agents)
297 | 
298 |         self.action_dim = 11
299 | 
300 |         self.state_dim = agents
301 | 
302 | 
303 | 
304 |     def reset(self):
305 | 
306 |         self.state = np.random.uniform(0.,2.,agents)
307 | 
308 |         
309 |         return self.state
310 | 
311 |     def step(self, action):
312 | 
313 |         info = {'n': []}
314 |         reward = []
315 |         done = []
316 | 
317 |         r = np.sum(np.array(action) * self.state)/agents
318 | 
319 | 
320 |         if penalty == 1:
321 |             rv = r * np.exp( -np.square(r-5) / 1) + r * np.exp( -np.square(r-8) / 0.25)
322 |         else:
323 |             rv = r * np.exp( -np.square(r-8) / 0.25)
324 |         
325 | 
326 |         reward.append(rv) 
327 | 
328 |         done.append(True)
329 | 
330 |         return self.state, reward, done, info
331 | 
332 |     def call_action_dim(self):
333 |         return self.action_dim
334 | 
335 |     def call_state_dim(self):
336 |         return self.state_dim
337 | 


--------------------------------------------------------------------------------
/Others/envs/grid_core.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import config
  3 | 
  4 | 
  5 | COLOR_TO_IDX = config.COLOR_TO_IDX
  6 | OBJECT_TO_IDX = config.OBJECT_TO_IDX
  7 | 
  8 | N = 0
  9 | E = 1
 10 | O = 2
 11 | W = 3
 12 | S = 4
 13 | 
 14 | # action of the agent
 15 | class Action(object):
 16 |     def __init__(self):
 17 |         # physical action
 18 |         self.u = None
 19 |         # communication action
 20 |         self.c = None
 21 | 
 22 | # properties and state of physical world entity
 23 | class Entity(object):
 24 |     def __init__(self, itype, color):
 25 |         assert itype in OBJECT_TO_IDX, itype
 26 |         assert color in COLOR_TO_IDX, color
 27 |         self.itype = itype
 28 |         self.color = color
 29 |         self.contains = None
 30 | 
 31 |         # name 
 32 |         self.name = ''
 33 |         # properties:
 34 |         self.movable = False
 35 |         # entity collides with others
 36 |         self.collide = True
 37 |         # material density (affects mass)
 38 |         self.density = 25.0
 39 | 
 40 |     @property
 41 |     def pos(self):
 42 |         return self._x, self._y
 43 | 
 44 |     def set_pos(self, x, y):
 45 |         self._x = x
 46 |         self._y = y
 47 | 
 48 | # properties of agent entities
 49 | class CoreAgent(Entity):
 50 |     def __init__(self, itype='agent', color='green'):
 51 |         super(CoreAgent, self).__init__(itype, color)
 52 |         self.name = ""
 53 |         # agents are movable by default
 54 |         self.movable = True
 55 |         # cannot send communication signals
 56 |         self.silent = True
 57 |         # action
 58 |         self.action = Action()
 59 |         # if waiting for other agents action
 60 |         self.waiting = False
 61 |         # if done doing its action in the current step
 62 |         self.done_moving = False
 63 |         # if the intended step collided 
 64 |         self.collided = False
 65 | 
 66 |         self._obs = None
 67 |         self._x = 0
 68 |         self._y = 0
 69 |         self.obs_range = 1
 70 | 
 71 |     def update_obs(self, obs):
 72 |         self._obs = obs
 73 | 
 74 |     def get_obs(self):
 75 |         return self._obs
 76 | 
 77 | class Wall(Entity):
 78 |     def __init__(self, color='grey'):
 79 |         super(Wall, self).__init__('wall', color)
 80 | 
 81 | class Grid(object):
 82 |     """
 83 |     Represent a grid and operations on it
 84 |     """
 85 | 
 86 |     def __init__(self, width, height):
 87 |         assert width >= 2
 88 |         assert height >= 2
 89 | 
 90 |         self.width = width
 91 |         self.height = height
 92 |         self.reset()
 93 | 
 94 |     def set(self, i, j, v):
 95 |         assert i >= 0 and i < self.width
 96 |         assert j >= 0 and j < self.height
 97 |         self.grid[j * self.width + i] = v
 98 | 
 99 |     def get(self, i, j):
100 |         if ((i >= 0 and i < self.width) and \
101 |             (j >= 0 and j < self.height)):
102 |             return self.grid[j * self.width + i]
103 | 
104 |         return Wall()
105 | 
106 |     def reset(self):
107 |         self.grid = [None] * self.width * self.height
108 | 
109 |     def setHorzWall(self, x, y, length=None):
110 |         if length is None:
111 |             length = self.width - x
112 |         for i in range(0, length):
113 |             self.set(x + i, y, Wall())
114 | 
115 |     def setVertWall(self, x, y, length=None):
116 |         if length is None:
117 |             length = self.height - y
118 |         for j in range(0, length):
119 |             self.set(x, y + j, Wall())
120 | 
121 |     def wallRect(self, x, y, w, h):
122 |         self.setHorzWall(x, y, w)
123 |         self.setHorzWall(x, y+h-1, w)
124 |         self.setVertWall(x, y, h)
125 |         self.setVertWall(x+w-1, y, h)
126 | 
127 |     def slice(self, topX, topY, width, height):
128 |         """
129 |         Get a subset of the grid
130 |         """
131 | 
132 |         grid = Grid(width, height)
133 | 
134 |         for j in range(0, height):
135 |             for i in range(0, width):
136 |                 x = topX + i
137 |                 y = topY + j
138 | 
139 |                 if x >= 0 and x < self.width and \
140 |                    y >= 0 and y < self.height:
141 |                     v = self.get(x, y)
142 |                 else:
143 |                     v = Wall()
144 | 
145 |                 grid.set(i, j, v)
146 | 
147 |         return grid
148 | 
149 |     def encode(self):
150 |         """
151 |         Produce a compact numpy encoding of the grid
152 |         """
153 | 
154 |         array = np.zeros(shape=(self.height, self.width, 3), dtype=np.int8)
155 | 
156 |         for j in range(0, self.height):
157 |             for i in range(0, self.width):
158 | 
159 |                 v = self.get(i, j)
160 |                 if isinstance(v, CoreAgent):
161 |                     array[j, i, 2] = v.id
162 | 
163 |                 if v == None:
164 |                     continue
165 | 
166 | 
167 |                 array[j, i, 0] = OBJECT_TO_IDX[v.itype]
168 |                 array[j, i, 1] = COLOR_TO_IDX[v.color]
169 | 
170 |         return array
171 | 
172 | # multi-agent world
173 | class World(object):
174 |     def __init__(self, width, height):
175 |         # list of agents and entities (can change at execution-time!)
176 |         self.agents = []
177 | 
178 |         # communication channel dimensionality
179 |         self.dim_c = 0
180 |         # position dimensionality
181 |         self.dim_p = 2
182 | 
183 |         self.width = width
184 |         self.height = height
185 | 
186 |         self.grid = Grid(self.width, self.height)
187 |         self.grid.wallRect(0, 0, self.width, self.height)
188 | 
189 |         self.step_cnt = 0
190 | 
191 |     def empty_grid(self):
192 |         self.step_cnt = 0
193 |         self.grid.reset()
194 | 
195 |     def placeObj(self, obj, top=None, size=None, reject_fn=None):
196 |         """
197 |         Place an object at an empty position in the grid
198 | 
199 |         :param top: top-left position of the rectangle where to place
200 |         :param size: size of the rectangle where to randomly place
201 |         :param reject_fn: function to filter out potential positions
202 |         """
203 | 
204 |         if top is None:
205 |             top = (0, 0)
206 | 
207 |         if size is None:
208 |             size = (self.grid.width, self.grid.height)
209 | 
210 |         while True:
211 |             pos = (
212 |                 np.random.randint(top[0], top[0] + size[0]),
213 |                 np.random.randint(top[1], top[1] + size[1])
214 |             )
215 | 
216 |             # Don't place the object on top of another object
217 |             if self.grid.get(*pos) != None:
218 |                 continue
219 | 
220 |             # Check if there is a filtering criterion
221 |             if reject_fn and reject_fn(self, pos):
222 |                 continue
223 | 
224 |             break
225 | 
226 |         self.grid.set(pos[0], pos[1], obj)
227 |         obj.set_pos(pos[0], pos[1])
228 |         return pos
229 | 
230 |     def resetObj(self, obj, top=None, size=None, reject_fn=None):
231 |         """
232 |         Reset an object at an empty position in the grid
233 | 
234 |         :param top: top-left position of the rectangle where to place
235 |         :param size: size of the rectangle where to randomly place
236 |         :param reject_fn: function to filter out potential positions
237 |         """
238 |         if top is None:
239 |             top = (0, 0)
240 | 
241 |         if size is None:
242 |             size = (self.grid.width, self.grid.height)
243 | 
244 |         while True:
245 |             pos = (
246 |                 np.random.randint(top[0], top[0] + size[0]),
247 |                 np.random.randint(top[1], top[1] + size[1])
248 |             )
249 | 
250 |             # Don't place the object on top of another object
251 |             if self.grid.get(*pos) != None:
252 |                 continue
253 | 
254 |             # Check if there is a filtering criterion
255 |             if reject_fn and reject_fn(self, pos):
256 |                 continue
257 | 
258 |             break
259 |         x, y = obj.pos
260 |         self.grid.set(x, y, None)
261 |         self.grid.set(pos[0], pos[1], obj)
262 |         obj.set_pos(pos[0], pos[1])
263 |         return pos
264 | 
265 |     def single_agent_step(self, agent, action):
266 |         if agent.done_moving or agent.waiting:
267 |             return
268 | 
269 |         x, y = agent.pos
270 |         action = agent.action.u
271 | 
272 |         if   action == N:
273 |             y -= 1
274 |         elif action == E:
275 |             x -= 1
276 |         elif action == W:
277 |             x += 1
278 |         elif action == S:
279 |             y += 1
280 |         elif action == O:
281 |             agent.done_moving = True
282 |             agent.collided = False
283 |             return
284 | 
285 |         intended_cell = self.grid.get(x, y)
286 |         if isinstance(intended_cell, CoreAgent):
287 |             agent.waiting = True
288 |             # let the other agent move first
289 |             self.single_agent_step(intended_cell, intended_cell.action.u)
290 |             agent.waiting = False
291 |             # get the intended cell (to check if it is empty)
292 |             intended_cell = self.grid.get(x, y)
293 | 
294 |         # check if the intended cell is empty
295 |         if not intended_cell is None:  
296 |             agent.collided = True
297 |         else:
298 |             x_0, y_0 = agent.pos
299 |             self.grid.set(x_0, y_0, None)
300 |             self.grid.set(x, y, agent)
301 |             agent.set_pos(x, y)
302 |             agent.collided = False
303 | 
304 |         agent.done_moving = True
305 | 
306 |     # update state of the world
307 |     def step(self, action_n):
308 |         self.step_cnt += 1
309 |         # set the action
310 |         for i, agent in enumerate(self.agents):
311 |             agent.action.u = action_n[i]
312 |             agent.done_moving = False
313 |             
314 |         # do the action
315 |         for agent in self.agents:
316 |             self.single_agent_step(agent, agent.action.u)
317 | 
318 |         # update observations of all agents
319 |         self.set_observations()
320 | 
321 |     def set_observations(self):
322 |         for agent in self.agents:
323 |             x, y = agent.pos
324 |             r = agent.obs_range
325 |             obs = self.grid.slice(x-r, y-r,r*2+1,r*2+1)
326 |             agent.update_obs(obs.encode())
327 | 
328 |     def get_full_encoding(self):
329 |         return self.grid.encode()


--------------------------------------------------------------------------------
/Others/envs/scenario.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | # defines scenario upon which the world is built
 4 | class BaseScenario(object):
 5 |     # create elements of the world
 6 |     def make_world(self):
 7 |         raise NotImplementedError()
 8 |     # create initial conditions of the world
 9 |     def reset_world(self, world):
10 |         raise NotImplementedError()
11 | 


--------------------------------------------------------------------------------
/Others/envs/scenarios/__init__.py:
--------------------------------------------------------------------------------
1 | import imp
2 | import os.path as osp
3 | 
4 | 
5 | def load(name):
6 |     pathname = osp.join(osp.dirname(__file__), name)
7 |     return imp.load_source('', pathname)
8 | 


--------------------------------------------------------------------------------
/Others/envs/scenarios/endless.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from collections import deque
  3 | from envs.grid_core import World
  4 | from envs.grid_core import CoreAgent as Agent
  5 | from envs.scenario import BaseScenario
  6 | import config
  7 | 
  8 | FLAGS = config.flags.FLAGS
  9 | 
 10 | n_predator = FLAGS.n_predator
 11 | n_prey = FLAGS.n_prey
 12 | map_size = FLAGS.map_size
 13 | 
 14 | class Prey(Agent):
 15 |     def __init__(self):
 16 |         super(Prey, self).__init__("prey", "green")
 17 |         self._movement_mask = np.array(
 18 |             [[0,1,0],
 19 |              [1,0,1],
 20 |              [0,1,0]], dtype=np.int8)
 21 | 
 22 |     def cannot_move(self):
 23 |         minimap = (self._obs[:,:,0] != 0)
 24 |         return np.sum(minimap*self._movement_mask)==4
 25 | 
 26 |     def can_observe_predator(self):
 27 |         shape = np.shape(self._obs[:,:,0])
 28 |         obs_size = shape[0]*shape[1]
 29 |         obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size)
 30 |         ret = np.shape(np.where(obs == 3))[1] > 0
 31 |         return ret
 32 | 
 33 |     def can_observe_two_predator(self):
 34 |         shape = np.shape(self._obs[:,:,0])
 35 |         obs_size = shape[0]*shape[1]
 36 |         obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size)
 37 |         ret = np.shape(np.where(obs == 3))[1] > 1
 38 |         return ret
 39 | 
 40 | class Predator(Agent):
 41 |     def __init__(self):
 42 |         super(Predator, self).__init__("predator", "blue")
 43 |         self._obs = deque(maxlen=FLAGS.history_len)
 44 |         self.obs_range = 1
 45 | 
 46 |     def can_observe_prey(self):
 47 |         shape = np.shape(self._obs)
 48 |         obs_size = shape[1]*shape[2]
 49 |         obs = np.reshape(self._obs, obs_size)
 50 |         ret = np.shape(np.where(obs == 4))[1] > 0
 51 |         return ret
 52 | 
 53 |     def update_obs(self, obs):
 54 |         self._obs.append(obs[:,:,0]) # use only the first channel
 55 | 
 56 |     def fill_obs(self):
 57 |         # fill the whole history with the current observation
 58 |         for i in range(FLAGS.history_len-1):
 59 |             self._obs.append(self._obs[-1])
 60 | 
 61 | class Scenario(BaseScenario):
 62 |     def __init__(self):
 63 |         self.prey_captured = False
 64 | 
 65 |     def make_world(self):
 66 |         world = World(width=map_size, height=map_size)
 67 | 
 68 |         agents = []
 69 |         self.atype_to_idx = {
 70 |             "predator": [],
 71 |             "prey": []
 72 |         }
 73 | 
 74 |         # add predators
 75 |         for i in xrange(n_predator):
 76 |             agents.append(Predator())
 77 |             self.atype_to_idx["predator"].append(i)
 78 | 
 79 |         # add preys
 80 |         for i in xrange(n_prey):
 81 |             agents.append(Prey())
 82 |             self.atype_to_idx["prey"].append(n_predator + i)
 83 | 
 84 |         world.agents = agents
 85 |         for i, agent in enumerate(world.agents):
 86 |             agent.id = i + 1
 87 |             agent.silent = True 
 88 | 
 89 |         # make initial conditions
 90 |         self.reset_world(world)
 91 |         return world
 92 | 
 93 |     def reset_world(self, world):
 94 |         world.empty_grid()
 95 | 
 96 |         # randomly place agent
 97 |         for agent in world.agents:
 98 |             world.placeObj(agent)
 99 | 
100 |         world.set_observations()
101 | 
102 |         # fill the history with current observation
103 |         for i in self.atype_to_idx["predator"]:
104 |             world.agents[i].fill_obs()
105 | 
106 |         self.prey_captured = False
107 | 
108 |     def reward(self, agent, world):
109 |         if agent.itype == "predator":
110 |             # if self.prey_captured:
111 |             #     # return max(10 - world.step_cnt, 0)
112 |             #     return 1
113 |             # else:
114 |             #     reward = -0.01
115 |             #     for i in self.atype_to_idx["prey"]:
116 |             #         prey = world.agents[i]
117 |             #         if prey.cannot_move():
118 |             #             reward = 1
119 |             #             world.resetObj(prey)
120 |             #             return reward
121 |             #     # kdw - Use this for large map size
122 |             #     # if agent.can_observe_prey():
123 |             #     #     reward = 0.0
124 |             #     return reward
125 |             reward = -0.001
126 | 
127 |             for i in self.atype_to_idx["prey"]:
128 |                 prey = world.agents[i]
129 |                 if prey.can_observe_predator():
130 |                     #world.resetObj(prey)
131 |                     reward += 0.1
132 |             return reward
133 | 
134 |         else: # if prey
135 |             if agent.cannot_move():
136 |                 return -1
137 | 
138 |         return 0
139 | 
140 |     def observation(self, agent, world):
141 |         # print agent.get_obs.shape
142 |         obs = np.array(agent.get_obs()).flatten()
143 |         return obs
144 | 
145 |     def done(self, agent, world):
146 |         if agent.itype == "prey":
147 |             if agent.can_observe_predator():
148 |                 world.resetObj(agent)
149 |         return False
150 |         #return self.prey_captured


--------------------------------------------------------------------------------
/Others/envs/scenarios/pursuit.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from collections import deque
  3 | from envs.grid_core import World
  4 | from envs.grid_core import CoreAgent as Agent
  5 | from envs.scenario import BaseScenario
  6 | import config
  7 | 
  8 | FLAGS = config.flags.FLAGS
  9 | 
 10 | n_predator = FLAGS.n_predator
 11 | n_prey = FLAGS.n_prey
 12 | map_size = FLAGS.map_size
 13 | 
 14 | class Prey(Agent):
 15 |     def __init__(self):
 16 |         super(Prey, self).__init__("prey", "green")
 17 |         self._movement_mask = np.array(
 18 |             [[0,1,0],
 19 |              [1,0,1],
 20 |              [0,1,0]], dtype=np.int8)
 21 | 
 22 |     def cannot_move(self):
 23 |         minimap = (self._obs[:,:,0] != 0)
 24 |         return np.sum(minimap*self._movement_mask)==4
 25 | 
 26 | class Predator(Agent):
 27 |     def __init__(self):
 28 |         super(Predator, self).__init__("predator", "blue")
 29 |         self._obs = deque(maxlen=FLAGS.history_len)
 30 |         self.obs_range = 1
 31 | 
 32 |     def can_observe_prey(self):
 33 |         shape = np.shape(self._obs)
 34 |         obs_size = shape[1]*shape[2]
 35 |         obs = np.reshape(self._obs, obs_size)
 36 |         ret = np.shape(np.where(obs == 4))[1] > 0
 37 |         return ret
 38 | 
 39 |     def update_obs(self, obs):
 40 |         self._obs.append(obs[:,:,0]) # use only the first channel
 41 | 
 42 |     def fill_obs(self):
 43 |         # fill the whole history with the current observation
 44 |         for i in range(FLAGS.history_len-1):
 45 |             self._obs.append(self._obs[-1])
 46 | 
 47 | class Scenario(BaseScenario):
 48 |     def __init__(self):
 49 |         self.prey_captured = False
 50 | 
 51 |     def make_world(self):
 52 |         world = World(width=map_size, height=map_size)
 53 | 
 54 |         agents = []
 55 |         self.atype_to_idx = {
 56 |             "predator": [],
 57 |             "prey": []
 58 |         }
 59 | 
 60 |         # add predators
 61 |         for i in xrange(n_predator):
 62 |             agents.append(Predator())
 63 |             self.atype_to_idx["predator"].append(i)
 64 | 
 65 |         # add preys
 66 |         for i in xrange(n_prey):
 67 |             agents.append(Prey())
 68 |             self.atype_to_idx["prey"].append(n_predator + i)
 69 | 
 70 |         world.agents = agents
 71 |         for i, agent in enumerate(world.agents):
 72 |             agent.id = i + 1
 73 |             agent.silent = True 
 74 | 
 75 |         # make initial conditions
 76 |         self.reset_world(world)
 77 |         return world
 78 | 
 79 |     def reset_world(self, world):
 80 |         world.empty_grid()
 81 | 
 82 |         # randomly place agent
 83 |         for agent in world.agents:
 84 |             world.placeObj(agent)
 85 | 
 86 |         world.set_observations()
 87 | 
 88 |         # fill the history with current observation
 89 |         for i in self.atype_to_idx["predator"]:
 90 |             world.agents[i].fill_obs()
 91 | 
 92 |         self.prey_captured = False
 93 | 
 94 |     def reward(self, agent, world):
 95 |         if agent.itype == "predator":
 96 |             if self.prey_captured:
 97 |                 # return max(10 - world.step_cnt, 0)
 98 |                 return 1
 99 |             else:
100 |                 reward = -0.01
101 |                 for i in self.atype_to_idx["prey"]:
102 |                     prey = world.agents[i]
103 |                     if prey.cannot_move():
104 |                         # print "captured"
105 |                         self.prey_captured = True
106 |                         reward = 1
107 |                         return reward
108 |                 # kdw - Use this for large map size
109 |                 # if agent.can_observe_prey():
110 |                 #     reward = 0.0
111 |                 return reward
112 |         else: # if prey
113 |             if agent.cannot_move():
114 |                 return -1
115 | 
116 |         return 0
117 | 
118 |     def observation(self, agent, world):
119 |         # print agent.get_obs.shape
120 |         obs = np.array(agent.get_obs()).flatten()
121 |         return obs
122 | 
123 |     def done(self, agent, world):
124 |         return self.prey_captured


--------------------------------------------------------------------------------
/Others/envs/scenarios/single_agent.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from envs.scenarios.pursuit import Scenario as BaseScenario
 3 | import config
 4 | 
 5 | FLAGS = config.flags.FLAGS
 6 | map_size = FLAGS.map_size
 7 | 
 8 | class Scenario(BaseScenario):
 9 |     def __init__(self):
10 |         super(Scenario, self).__init__()
11 |         print "Single agent scenario"
12 | 
13 |     def reset_world(self, world):
14 |         world.empty_grid()
15 | 
16 |         prey_pos = [0, 0]
17 | 
18 |         prey_idx = self.atype_to_idx["prey"][0]
19 |         world.placeObj(world.agents[prey_idx], top=prey_pos, size=(1,1))
20 | 
21 |         top = ((prey_pos[0]+1)%map_size, (prey_pos[1]+1)%map_size)
22 | 
23 |         world.placeObj(world.agents[0], top=top, size=(2, 2))
24 |         world.placeObj(world.agents[1], top=[0, 1], size=(1, 1))
25 | 
26 |         world.set_observations()
27 | 
28 |         # fill the history with current observation
29 |         for i in self.atype_to_idx["predator"]:
30 |             world.agents[i].fill_obs()
31 | 
32 |         self.prey_captured = False
33 | 


--------------------------------------------------------------------------------
/Others/envs/scenarios/static_prey.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from envs.scenarios.pursuit import Scenario as BaseScenario
 3 | import config
 4 | 
 5 | FLAGS = config.flags.FLAGS
 6 | map_size = FLAGS.map_size
 7 | 
 8 | class Scenario(BaseScenario):
 9 |     def __init__(self):
10 |         super(Scenario, self).__init__()
11 | 
12 |     def reset_world(self, world):
13 |         world.empty_grid()
14 | 
15 |         # prey_pos = np.random.choice([map_size - 1, 0], 2)
16 |         prey_pos = [0, 0]
17 |         prey_idx = self.atype_to_idx["prey"][0]
18 |         world.placeObj(world.agents[prey_idx], top=prey_pos, size=(1,1))
19 | 
20 |         top = ((prey_pos[0]+1)%map_size, (prey_pos[1]+1)%map_size)
21 |         for idx in self.atype_to_idx["predator"]:
22 |             world.placeObj(world.agents[idx], top=top, size=(2,2))
23 | 
24 |         world.set_observations()
25 | 
26 |         # fill the history with current observation
27 |         for i in self.atype_to_idx["predator"]:
28 |             world.agents[i].fill_obs()
29 | 
30 |         self.prey_captured = False
31 | 


--------------------------------------------------------------------------------
/Others/main.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf8
 3 | import logging
 4 | import make_env
 5 | import agents
 6 | import config
 7 | import time
 8 | import random
 9 | import tensorflow as tf
10 | import numpy as np
11 | 
12 | FLAGS = config.flags.FLAGS
13 | 
14 | def set_seed(seed):
15 |     """Initialized the random seeds
16 |     """
17 |     random.seed(seed)
18 |     np.random.seed(seed)
19 |     tf.set_random_seed(seed)
20 |     return None
21 | 
22 | 
23 | if __name__ == '__main__':
24 | 
25 |     set_seed(FLAGS.seed)
26 | 
27 |     # === Logging setup === #
28 |     logger_env = logging.getLogger('SimpleMARL')
29 |     logger_agent = logging.getLogger('Agent')
30 | 
31 |     # === Program start === #
32 |     # Load environment
33 |     env = make_env.make_env(FLAGS.scenario)
34 |     logger_env.info('SimpleMARL Start with %d predator(s) and %d prey(s)', FLAGS.n_predator, FLAGS.n_prey)
35 | 
36 |     # Load trainer
37 |     logger_agent.info('Agent: {}'.format(FLAGS.agent))
38 |     trainer = agents.load(FLAGS.agent+"/trainer.py").Trainer(env)
39 | 
40 |     # print FLAGS.agent, config.file_name
41 | 
42 |     # start learning
43 |     if FLAGS.train:
44 |         start_time = time.time()
45 |         trainer.learn()
46 |         finish_time = time.time()
47 |         # trainer.test()
48 |         # print "TRAINING TIME (sec)", finish_time - start_time
49 |     else:
50 |         trainer.test()
51 | 


--------------------------------------------------------------------------------
/Others/make_env.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Code for creating a multiagent environment with one of the scenarios listed
 3 | in ./scenarios/.
 4 | Can be called by using, for example:
 5 |     env = make_env('simple_speaker_listener')
 6 | After producing the env object, can be used similarly to an OpenAI gym
 7 | environment.
 8 | 
 9 | A policy using this environment must output actions in the form of a list
10 | for all agents. Each element of the list should be a numpy array,
11 | of size (env.world.dim_p + env.world.dim_c, 1). Physical actions precede
12 | communication actions in this array. See environment.py for more details.
13 | """
14 | 
15 | def make_env(scenario_name, benchmark=False):
16 |     '''
17 |     Creates a MultiAgentEnv object as env. This can be used similar to a gym
18 |     environment by calling env.reset() and env.step().
19 |     Use env.render() to view the environment on the screen.
20 | 
21 |     Input:
22 |         scenario_name   :   name of the scenario from ./scenarios/ to be Returns
23 |                             (without the .py extension)
24 |         benchmark       :   whether you want to produce benchmarking data
25 |                             (usually only done during evaluation)
26 | 
27 |     Some useful env properties (see environment.py):
28 |         .observation_space  :   Returns the observation space for each agent
29 |         .action_space       :   Returns the action space for each agent
30 |         .n                  :   Returns the number of Agents
31 |     '''
32 |     # from envs.environment import MultiAgentEnv
33 |     import envs.scenarios as scenarios
34 |     from envs.environment import MultiAgentSimpleEnv2 as MAS
35 |     import config
36 | 
37 |     env = MAS()
38 | 
39 |     return env
40 | 


--------------------------------------------------------------------------------
/Others/readme:
--------------------------------------------------------------------------------
 1 | Training
 2 | 
 3 | $algorithm = vdn, qmix, pqmix5(=QTRAN-alt in the paper), pqmix7(=QTRAN in the paper)
 4 | 
 5 | python main.py --agent pos_cac_fo --training_step 10000 --b_size 10000 --m_size 32 --seed 0 --algorithm $algorithm --penalty 0
 6 | 
 7 | 
 8 | In make_env.py
 9 | 
10 | (i) Matrix game
11 | 
12 | from envs.environment import MultiAgentSimpleEnv2 as MAS
13 | 
14 | (i) Gaussian Squeeze
15 | 
16 | from envs.environment import MultiAgentSimpleEnv4 as MAS
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/Others/run_DQN2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | #for seed in 1 2 3 4 5
4 | #do
5 |     CUDA_VISIBLE_DEVICES=$1 python main.py --agent pos_cac_fo --training_step 8000 --b_size 10000 --m_size 64 --seed 6 --algorithm $2 --penalty $3 &
6 | #done
7 |     
8 | 


--------------------------------------------------------------------------------
/Others/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os,sys
 3 | sys.path.insert(1, os.path.join(sys.path[0], '..'))
 4 | import argparse
 5 | 
 6 | from envs.environment import MultiAgentEnv
 7 | import envs.scenarios as scenarios
 8 | import numpy as np
 9 | import config
10 | 
11 | FLAGS = config.flags.FLAGS
12 | 
13 | 
14 | if __name__ == '__main__':
15 |     # parse arguments
16 |     parser = argparse.ArgumentParser(description=None)
17 |     parser.add_argument('-s', '--scenario', default='pursuit.py', help='Path of the scenario Python script.')
18 |     args = parser.parse_args()
19 | 
20 |     # load scenario from script
21 |     scenario = scenarios.load(args.scenario).Scenario()
22 |     # create world
23 |     world = scenario.make_world()
24 |     # create multiagent environment
25 |     env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, done_callback=scenario.done)
26 |     act_n = [2, 2]
27 |     print "action space:", env.action_space[0].n
28 |     print "observation space:", env.observation_space
29 | 
30 |     obs_n = env.reset()[:2]
31 |     print env.get_agent_profile()
32 |     print env.get_full_encoding()[:, :, 2]
33 |     imap = np.array(obs_n).reshape((2, FLAGS.history_len,3,3,1))
34 | 
35 |     minimap = imap[:,:,:,:,0]
36 |     print minimap[0, -1]
37 |     print minimap[1, -1]
38 | 
39 |     while True:
40 |         a0 = input("action of agent 0:")
41 |         a1 = input("action of agent 1:")
42 |         act_n = [a0, a1, 2]
43 |         obs_n, reward_n, done_n, info_n = env.step(act_n)
44 |         obs_n = obs_n[:2]
45 |         
46 | 
47 |         print env.get_full_encoding()[:,:,2]
48 |         imap = np.array(obs_n).reshape((2, FLAGS.history_len,3,3,1))
49 | 
50 |         minimap = imap[:,:,:,:,0]
51 |         print minimap[0, -1]
52 |         print minimap[1, -1]
53 | 
54 | 
55 |         print reward_n, done_n
56 | 
57 | 


--------------------------------------------------------------------------------
/Predator-Prey/agents/__init__.py:
--------------------------------------------------------------------------------
 1 | import imp
 2 | import os.path as osp
 3 | import logging
 4 | 
 5 | 
 6 | def load(name):
 7 |     pathname = osp.join(osp.dirname(__file__), name)
 8 |     return imp.load_source('', pathname)
 9 | 
10 | 
11 | logger_agent = logging.getLogger('Agent')
12 | logger_agent.setLevel(logging.INFO)
13 | fh_agent = logging.FileHandler('./agent.log')
14 | sh = logging.StreamHandler()
15 | fm = logging.Formatter('[%(levelname)s|%(filename)s:%(lineno)s] %(asctime)s > [%(name)s] %(message)s')
16 | fh_agent.setFormatter(fm)
17 | sh.setFormatter(fm)
18 | logger_agent.addHandler(fh_agent)
19 | logger_agent.addHandler(sh)
20 | 


--------------------------------------------------------------------------------
/Predator-Prey/agents/config_agents.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf8
 3 | # import agents
 4 | 
 5 | 
 6 | def config_agent(_flags):
 7 |     flags = _flags
 8 | 
 9 |     flags.DEFINE_string("agent", "cac_fo", "Agent")
10 | 
11 |     flags.DEFINE_integer("training_step", 500000, "Training time step")
12 |     flags.DEFINE_integer("testing_step", 1000, "Testing time step")
13 |     flags.DEFINE_integer("max_step", 200, "Maximum time step per episode")
14 |     flags.DEFINE_integer("eval_step", 1000, "Number of steps before training")
15 |     # flags.DEFINE_integer("training_step", 5000, "Training time step")
16 |     # flags.DEFINE_integer("testing_step", 1000, "Testing time step")
17 |     # flags.DEFINE_integer("max_step", 200, "Maximum time step per episode")
18 |     # flags.DEFINE_integer("eval_step", 1000, "Number of steps before training")
19 | 
20 |     flags.DEFINE_integer("b_size", 10000, "Size of the replay memory")
21 |     flags.DEFINE_integer("m_size", 32, "Minibatch size")
22 |     flags.DEFINE_integer("pre_train_step", 300, "during [m_size * pre_step] take random action")
23 |     flags.DEFINE_float("lr", 0.00025, "Learning rate")
24 |     # flags.DEFINE_float("lr", 0.01, "Learning rate") # it is for single
25 |     flags.DEFINE_float("df", 0.99, "Discount factor")
26 | 
27 |     flags.DEFINE_boolean("load_nn", False, "Load nn from file or not")
28 |     flags.DEFINE_string("nn_file", "results/nn/n-2-s-endless3-map-5-penalty-10-a-pqmix5-lr-0.0005-ms-32-seed-28-0103231136-215-3000000", "The name of file for loading")
29 |     
30 |     flags.DEFINE_boolean("train", True, "Training or testing")
31 |     flags.DEFINE_boolean("qtrace", False, "Use q trace")
32 |     flags.DEFINE_boolean("kt", False, "Keyboard input test")
33 |     flags.DEFINE_boolean("use_action_in_critic", False, "Use guided samples")
34 |     flags.DEFINE_string("algorithm", "ddd",
35 |                     "Which agent to run, as a python path to an Agent class.")
36 |     
37 | 
38 | 
39 | 
40 | 
41 | def get_filename():
42 |     import config
43 |     FLAGS = config.flags.FLAGS
44 | 
45 |     return "a-"+FLAGS.algorithm+"-lr-"+str(FLAGS.lr)+"-ms-"+str(FLAGS.m_size)
46 | 


--------------------------------------------------------------------------------
/Predator-Prey/agents/evaluation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf8
 3 | 
 4 | import numpy as np
 5 | import logging
 6 | import config
 7 | 
 8 | FLAGS = config.flags.FLAGS
 9 | result = logging.getLogger('Result')
10 | 
11 | class Evaluation(object):
12 | 
13 |     def __init__(self):
14 |         self.episode_cnt = 0
15 |         self.m = dict()
16 | 
17 |     def update_value(self, m_key, m_value, m_append=None):
18 |         if m_key in self.m:
19 |             self.m[m_key]['value'] += m_value
20 |             self.m[m_key]['cnt'] += 1
21 |         else:
22 |             self.m[m_key] = dict()
23 |             self.m[m_key]['value'] = m_value
24 |             self.m[m_key]['cnt'] = 1
25 |         if m_append is None:
26 |             result.info(m_key + "\t" + str(m_value))
27 |         else:
28 |             result.info(m_key + "\t" + str(m_value) + "\t" + str(m_append))
29 | 
30 |     def summarize(self, key=None):
31 |         if key is None:
32 |             for k in self.m:
33 |                 print "Average", k, float(self.m[k]['value'])/self.m[k]['cnt']
34 |                 result.info("summary\t" + k + "\t" + str(float(self.m[k]['value']) / self.m[k]['cnt']))
35 | 
36 |         elif key not in self.m:
37 |             print "Wrong key"
38 | 
39 |         else:
40 |             print "Average", key, float(self.m[key]['value']) / self.m[key]['cnt']
41 |             result.info("summary\t" + key + "\t" + str(float(self.m[key]['value'])/self.m[key]['cnt']))
42 | 


--------------------------------------------------------------------------------
/Predator-Prey/agents/non_simple_agent.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class RandomAgent(object):
 4 |     def __init__(self, action_dim):
 5 |         self._action_dim = action_dim
 6 | 
 7 |     def act(self, obs):
 8 | 
 9 |         if np.random.rand() < 3./8. :
10 |             return 2
11 |         else:
12 |             return np.random.randint(self._action_dim)
13 | 
14 |         # return 2
15 | 
16 |     def train(self, minibatch, step):
17 |         return
18 | 
19 | class StaticAgent(object):
20 |     def __init__(self, action):
21 |         self._action = action
22 | 
23 |     def act(self, obs):
24 |         return self._action
25 | 
26 |     def train(self, minibatch, step):
27 |         return
28 | 


--------------------------------------------------------------------------------
/Predator-Prey/agents/pos_cac_fo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sonkyunghwan/QTRAN/785c44ebc8379896dc9f513af2ac767d61013914/Predator-Prey/agents/pos_cac_fo/__init__.py


--------------------------------------------------------------------------------
/Predator-Prey/agents/pos_cac_fo/agent.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding=utf8
  3 | 
  4 | """
  5 | ===========================================
  6 |  :mod:`qlearn` Q-Learning
  7 | ===========================================
  8 | 
  9 | =====
 10 | 
 11 | Choose action based on q-learning algorithm
 12 | """
 13 | 
 14 | import numpy as np
 15 | import tensorflow as tf
 16 | import math
 17 | from agents.pos_cac_fo.dq_network import *
 18 | from agents.pos_cac_fo.replay_buffer import *
 19 | from agents.evaluation import Evaluation
 20 | 
 21 | import logging
 22 | import config
 23 | 
 24 | FLAGS = config.flags.FLAGS
 25 | logger = logging.getLogger("Agent")
 26 | result = logging.getLogger('Result')
 27 | 
 28 | 
 29 | class Agent(object):
 30 | 
 31 |     def __init__(self, action_dim, obs_dim, name=""):
 32 |         logger.info("Centralized DQN Agent")
 33 | 
 34 |         self._n_predator = FLAGS.n_predator
 35 |         self._n_prey = FLAGS.n_prey
 36 |         self.map_size = FLAGS.map_size
 37 | 
 38 |         self._obs_dim = obs_dim
 39 | 
 40 |         self._action_dim = action_dim * self._n_predator
 41 |         self._action_dim_single = action_dim
 42 |         self._n_object = (self._n_predator + self._n_prey)
 43 |         self._state_dim = 2 * (self._n_predator + self._n_prey)
 44 |         self._state_dim_single = (self.map_size**2)
 45 | 
 46 |         self._name = name
 47 |         self.update_cnt = 0
 48 |         self.target_update_period = 10000
 49 | 
 50 |         self.df = FLAGS.df
 51 |         self.lr = FLAGS.lr
 52 | 
 53 |         # Make Q-network
 54 |         tf.reset_default_graph()
 55 |         my_graph = tf.Graph()
 56 | 
 57 |         with my_graph.as_default():
 58 |             self.sess = tf.Session(graph=my_graph, config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)))
 59 |             self.q_network = DQNetwork(self.sess, self._state_dim, self._action_dim_single, self._n_predator) 
 60 |             self.sess.run(tf.global_variables_initializer())
 61 |             self.saver = tf.train.Saver()
 62 |             if FLAGS.load_nn:
 63 |                 print "LOAD!"
 64 |                 self.saver.restore(self.sess, "./results/nn3/n-"+str(FLAGS.n_predator)+"-s-endless3-map-"+str(FLAGS.map_size)+"-penalty-"+str(FLAGS.penalty)+"-a-"+str(FLAGS.algorithm)+"-lr-0.0005-ms-32-seed-"+str(FLAGS.seed)+"-"+str(FLAGS.comment))
 65 |             self.train_writer = tf.summary.FileWriter(config.tb_filename, self.sess.graph)
 66 | 
 67 |         self.replay_buffer = ReplayBuffer()
 68 | 
 69 |         self._eval = Evaluation()
 70 |         self.q_prev = None
 71 |         self.s_array = np.random.randint(self.map_size, size = (2 * (FLAGS.n_prey + FLAGS.n_predator), 100))
 72 | 
 73 |     def act(self, state):
 74 | 
 75 |         predator_rand = np.random.permutation(FLAGS.n_predator)
 76 |         prey_rand = np.random.permutation(FLAGS.n_prey)      
 77 | 
 78 |         s = self.state_to_index(state)
 79 |     
 80 |         action = self.q_network.get_action(s[None])[0]
 81 |     
 82 |         return action
 83 | 
 84 | 
 85 | 
 86 |     def train(self, state, action, reward, state_n, done):
 87 | 
 88 |         
 89 |         predator_rand = np.random.permutation(FLAGS.n_predator)
 90 |         prey_rand = np.random.permutation(FLAGS.n_prey)
 91 |         
 92 |         a = self.action_to_onehot(action)
 93 |         s = self.state_to_index(state)
 94 |         s_n = self.state_to_index(state_n)
 95 |         r = np.sum(reward)
 96 | 
 97 |         self.store_sample(s, a, r, s_n, done)
 98 | 
 99 |         self.update_network()
100 |         return 0
101 | 
102 |     def store_sample(self, s, a, r, s_n, done):
103 |         self.replay_buffer.add_to_memory((s, a, r, s_n, done))
104 |         return 0
105 | 
106 |     def update_network(self):
107 |         self.update_cnt += 1
108 |         if len(self.replay_buffer.replay_memory) < FLAGS.pre_train_step*minibatch_size:
109 |             return 0
110 | 
111 |         minibatch = self.replay_buffer.sample_from_memory()
112 |         self.q_network.training_qnet(minibatch)
113 | 
114 | 
115 |         if self.update_cnt % self.target_update_period == 0:
116 |             self.q_network.training_target_qnet()
117 | 
118 |         if self.update_cnt % 10000 == 0:
119 |             self.saver.save(self.sess, config.nn_filename, self.update_cnt)
120 | 
121 |         return 0
122 | 
123 |     def state_to_index(self, state):
124 |         """
125 |         For the single agent case, the state is only related to the position of agent 1
126 |         :param state:
127 |         :return:
128 |         """
129 | 
130 |         ret = np.zeros(self._state_dim)
131 |         for i in range(FLAGS.n_predator + FLAGS.n_prey):
132 |             p = np.argwhere(np.array(state)==i+1)[0]
133 | 
134 |             ret[2 * i] = (p[0] - FLAGS.map_size /2.) / FLAGS.map_size
135 |             ret[2 * i + 1] = (p[1] - FLAGS.map_size /2.) / FLAGS.map_size
136 | 
137 | 
138 |         return ret
139 | 
140 |     def get_predator_pos(self, state):
141 |         """
142 |         return position of agent 1 and 2
143 |         :param state: input is state
144 |         :return:
145 |         """
146 |         state_list = list(np.array(state).ravel())
147 |         return state_list.index(1), state_list.index(2)
148 | 
149 |     def get_pos_by_id(self, state, id):
150 |         state_list = list(np.array(state).ravel())
151 |         return state_list.index(id)
152 | 
153 |     def onehot(self, index, size):
154 |         n_hot = np.zeros(size)
155 |         n_hot[index] = 1.0
156 |         return n_hot
157 | 
158 |     def index_to_action(self, index):
159 |         action_list = []
160 |         for i in range(FLAGS.n_predator-1):
161 |             action_list.append(index%5)
162 |             index = index/5
163 |         action_list.append(index)
164 |         return action_list
165 | 
166 |     def action_to_index(self, action):
167 |         index = 0
168 |         for i in range(FLAGS.n_predator):
169 |             index += action[i] * 5 ** i
170 |         return index
171 | 
172 |     def action_to_onehot(self, action):
173 |         onehot = np.zeros([self._n_predator, self._action_dim_single])
174 |         for i in range(self._n_predator):
175 |             onehot[i, action[i]] = 1
176 |         return onehot
177 | 
178 |     def onehot_to_action(self, onehot):
179 |         action = np.zeros([self._n_predator])
180 |         for i in range(self._n_predator):
181 |             action[i] = int(np.argmax(onehot[i]))
182 |         return action
183 | 
184 |     def q_diff(self):
185 | 
186 |         # if self.q_prev == None:
187 |         #     self.q_prev = self.q()
188 |         #     return
189 | 
190 |         # q_next = self.q()
191 | 
192 |         # d = 0.0
193 |         # a = 0.0
194 |         # for i in range(100):
195 |         #     d += math.fabs(self.q_prev[i] - q_next[i])
196 |         #     a += q_next[i]
197 |         # avg = a/100
198 | 
199 |         # self._eval.update_value("q_avg", avg, self.update_cnt)
200 |         # self._eval.update_value("q_diff", d, self.update_cnt)
201 | 
202 |         # self.q_prev = q_next
203 | 
204 |         # print self.update_cnt, d, avg
205 | 
206 |         print self.update_cnt
207 | 
208 |     def q(self):
209 |         q_value = []
210 |         # for i in range(100):
211 |         #     s = self.s_array[:,i]
212 |         #     s = (s - FLAGS.map_size /2.) / FLAGS.map_size
213 |         #     q = self.q_network.get_target_q_values(s[None])[0]
214 |         #     q_max = np.max(q)
215 |         #     q_value.append(q_max)
216 |         return q_value
217 | 
218 |     def logging(self, reward, step):
219 | 
220 |         summary = self.q_network.summary(reward, step) 
221 |  
222 |         self.train_writer.add_summary(summary, step)


--------------------------------------------------------------------------------
/Predator-Prey/agents/pos_cac_fo/replay_buffer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf8
 3 | import logging
 4 | import config
 5 | from collections import deque
 6 | import random
 7 | 
 8 | FLAGS = config.flags.FLAGS
 9 | 
10 | logger = logging.getLogger("Agent.replay")
11 | result = logging.getLogger('Result')
12 | 
13 | 
14 | class ReplayBuffer:
15 |     def __init__(self):
16 |         self.replay_memory_capacity = FLAGS.b_size  # capacity of experience replay memory
17 |         self.minibatch_size = FLAGS.m_size  # size of minibatch from experience replay memory for updates
18 |         self.replay_memory = deque(maxlen=self.replay_memory_capacity)
19 | 
20 |     def add_to_memory(self, experience):
21 |         self.replay_memory.append(experience)
22 | 
23 |     def sample_from_memory(self):
24 |         return random.sample(self.replay_memory, self.minibatch_size)
25 | 
26 |     def erase(self):
27 |         self.replay_memory.popleft()
28 | 


--------------------------------------------------------------------------------
/Predator-Prey/agents/pos_cac_fo/trainer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding=utf8
  3 | 
  4 | """
  5 | ===========================================
  6 |  :mod:`qlearn` Q-Learning
  7 | ===========================================
  8 | 
  9 | 
 10 | 설명
 11 | =====
 12 | 
 13 | Choose action based on q-learning algorithm
 14 | """
 15 | 
 16 | import numpy as np
 17 | import tensorflow as tf
 18 | from agents.pos_cac_fo.agent import Agent
 19 | from agents.simple_agent import RandomAgent as NonLearningAgent
 20 | from agents.evaluation import Evaluation
 21 | from agents.simple_agent import StaticAgent as StAgent
 22 | from agents.simple_agent import ActiveAgent as AcAgent
 23 | import logging
 24 | import config
 25 | from envs.gui import canvas
 26 | 
 27 | FLAGS = config.flags.FLAGS
 28 | logger = logging.getLogger("Agent")
 29 | result = logging.getLogger('Result')
 30 | 
 31 | training_step = FLAGS.training_step
 32 | testing_step = FLAGS.testing_step
 33 | 
 34 | epsilon_dec = 2.0/training_step
 35 | epsilon_min = 0.1
 36 | 
 37 | 
 38 | class Trainer(object):
 39 | 
 40 |     def __init__(self, env):
 41 |         logger.info("Centralized DQN Trainer is created")
 42 | 
 43 |         self._env = env 
 44 |         self._eval = Evaluation()
 45 |         self._n_predator = FLAGS.n_predator
 46 |         self._n_prey = FLAGS.n_prey
 47 |         self._agent_profile = self._env.get_agent_profile()
 48 |         self._agent_precedence = self._env.agent_precedence
 49 | 
 50 |         self._agent = Agent(self._agent_profile["predator"]["act_dim"], self._agent_profile["predator"]["obs_dim"][0])
 51 |         self._prey_agent = AcAgent(5)
 52 | 
 53 |         self.epsilon = 1.0
 54 |         if FLAGS.load_nn:
 55 |             self.epsilon = epsilon_min
 56 | 
 57 |         if FLAGS.gui:
 58 |             self.canvas = canvas.Canvas(self._n_predator, self._n_prey, FLAGS.map_size)
 59 |             self.canvas.setup()
 60 |     def learn(self):
 61 | 
 62 |         step = 0
 63 |         episode = 0
 64 |         print_flag = False
 65 |         count = 1
 66 | 
 67 |         while step < training_step:
 68 |             episode += 1
 69 |             ep_step = 0
 70 |             obs = self._env.reset()
 71 |             state = self._env.get_full_encoding()[:, :, 2]
 72 |             total_reward = 0
 73 |             total_reward_pos = 0
 74 |             total_reward_neg = 0
 75 |             self.random_action_generator()
 76 |             while True:
 77 |                 step += 1
 78 |                 ep_step += 1
 79 |                 action = self.get_action(obs, step, state)
 80 |                 obs_n, reward, done, info = self._env.step(action)
 81 |                 state_n = self._env.get_full_encoding()[:, :, 2]
 82 |                 done_single = sum(done) > 0
 83 | 
 84 |                 self.train_agents(state, action, reward, state_n, done_single)
 85 |                 obs = obs_n
 86 |                 state = state_n
 87 |                 total_reward += np.sum(reward)
 88 |                 if np.sum(reward) >= 0:
 89 |                     total_reward_pos += np.sum(reward)
 90 |                 else:
 91 |                     total_reward_neg += np.sum(reward)
 92 | 
 93 |                 if is_episode_done(done, step) or ep_step >= FLAGS.max_step :
 94 |                     # print step, ep_step, total_reward
 95 |                     if print_flag and episode % FLAGS.eval_step == 1:
 96 |                         print "[train_ep %d]" % (episode), "\treward", total_reward_pos, total_reward_neg
 97 |                     break
 98 | 
 99 |             if episode % FLAGS.eval_step == 0:
100 |                 self.test(episode)
101 | 
102 |         self._eval.summarize()
103 |     
104 |     def random_action_generator(self):
105 |         rand_unit = np.random.uniform(size = (FLAGS.n_predator, 5))
106 |         self.rand = rand_unit / np.sum(rand_unit, axis=1, keepdims=True)
107 |         
108 | 
109 |     def get_action(self, obs, step, state, train=True):
110 |         act_n = []
111 |         if train == True:
112 |             self.epsilon = max(self.epsilon - epsilon_dec, epsilon_min)
113 | 
114 |         # Action of predator
115 | 
116 |         action_list = self._agent.act(state)
117 |         for i in range(self._n_predator):
118 |             if train and (step < FLAGS.m_size * FLAGS.pre_train_step or np.random.rand() < self.epsilon):
119 |                 action = np.random.choice(5)
120 |                 act_n.append(action)
121 |             else:              
122 |                 act_n.append(action_list[i])
123 | 
124 | 
125 | 
126 |         # Action of prey
127 |         for i in range(FLAGS.n_prey):
128 |             act_n.append(self._prey_agent.act(state, i))
129 |         # act_n[1] = 2
130 | 
131 |         return np.array(act_n, dtype=np.int32)
132 | 
133 |     def train_agents(self, state, action, reward, state_n, done):
134 |         self._agent.train(state, action, reward, state_n, done)
135 | 
136 |     def test(self, curr_ep=None):
137 | 
138 |         step = 0
139 |         episode = 0
140 | 
141 |         test_flag = FLAGS.kt
142 |         sum_reward = 0
143 |         sum_reward_pos = 0
144 |         sum_reward_neg = 0
145 |         while step < testing_step:
146 |             episode += 1
147 |             obs = self._env.reset()
148 |             state = self._env.get_full_encoding()[:, :, 2]
149 |             if test_flag:
150 |                 print "\nInit\n", state
151 |             total_reward = 0
152 |             total_reward_pos = 0
153 |             total_reward_neg = 0
154 | 
155 |             ep_step = 0
156 | 
157 |             while True:
158 | 
159 |                 step += 1
160 |                 ep_step += 1
161 | 
162 |                 action = self.get_action(obs, step, state, False)
163 |                 obs_n, reward, done, info = self._env.step(action)
164 |                 state_n = self._env.get_full_encoding()[:, :, 2]
165 |                 state_next = state_to_index(state_n)
166 |                 if FLAGS.gui:
167 |                     self.canvas.draw(state_next, done, "Score:" + str(total_reward) + ", Step:" + str(ep_step))
168 | 
169 |                 if test_flag:
170 |                     aa = raw_input('>')
171 |                     if aa == 'c':
172 |                         test_flag = False
173 |                     print action
174 |                     print state_n
175 |                     print reward
176 | 
177 |                 obs = obs_n
178 |                 state = state_n
179 |                 r = np.sum(reward)
180 |                 # if r == 0.1:
181 |                 #     r = r * (-1.) * FLAGS.penalty
182 |                 total_reward += r # * (FLAGS.df ** (ep_step-1))
183 |                 if r > 0:
184 |                     total_reward_pos += r
185 |                 else:
186 |                     total_reward_neg -= r
187 | 
188 | 
189 |                 if is_episode_done(done, step, "test") or ep_step >= FLAGS.max_step:
190 | 
191 |                     if FLAGS.gui:
192 |                         self.canvas.draw(state_next, done, "Hello", "Score:" + str(total_reward) + ", Step:" + str(ep_step))
193 | 
194 |                     break
195 |             sum_reward += total_reward
196 |             sum_reward_pos += total_reward_pos
197 |             sum_reward_neg += total_reward_neg
198 |         if FLAGS.scenario =="pursuit":
199 |             print "Test result: Average steps to capture: ", curr_ep, float(step)/episode
200 |             self._eval.update_value("training result: ", float(step)/episode, curr_ep)
201 |         elif FLAGS.scenario =="endless" or FLAGS.scenario =="endless2" or FLAGS.scenario =="endless3":
202 |             print "Average reward:", FLAGS.penalty, curr_ep, sum_reward /episode, sum_reward_pos/episode, sum_reward_neg/episode
203 |             self._eval.update_value("training result: ", sum_reward/episode, curr_ep)
204 |             self._agent.logging(sum_reward/episode, curr_ep * 100)
205 | 
206 | 
207 | def is_episode_done(done, step, e_type="train"):
208 | 
209 |     if e_type == "test":
210 |         if sum(done) > 0 or step >= FLAGS.testing_step:
211 |             return True
212 |         else:
213 |             return False
214 | 
215 |     else:
216 |         if sum(done) > 0 or step >= FLAGS.training_step:
217 |             return True
218 |         else:
219 |             return False
220 | 
221 | def state_to_index(state):
222 |     """
223 |     For the single agent case, the state is only related to the position of agent 1
224 |     :param state:
225 |     :return:
226 |     """
227 | 
228 |     ret = np.zeros(2 * (FLAGS.n_predator + FLAGS.n_prey))
229 |     for i in range(FLAGS.n_predator + FLAGS.n_prey):
230 |         p = np.argwhere(np.array(state)==i+1)[0]
231 |         #p = self.get_pos_by_id(state, i+1)
232 |         ret[2 * i] = p[0]
233 |         ret[2 * i + 1] = p[1]
234 | 
235 | 
236 |     return ret
237 | 
238 |     
239 | 
240 | 
241 | 


--------------------------------------------------------------------------------
/Predator-Prey/agents/replay_buffer.py:
--------------------------------------------------------------------------------
 1 | from collections import deque
 2 | import random
 3 | import config
 4 | import numpy as np
 5 | 
 6 | FLAGS = config.flags.FLAGS
 7 | 
 8 | replay_memory_capacity = FLAGS.replay_buffer_capacity  # capacity of experience replay memory
 9 | minibatch_size = FLAGS.minibatch_size  # size of minibatch from experience replay memory for updates
10 | trace_length = FLAGS.rnn_trace_len
11 | 
12 | class ReplayBuffer:
13 | 	def __init__(self):
14 | 		self.replay_memory = deque(maxlen=replay_memory_capacity)
15 | 
16 | 	def add_to_memory(self, experience):
17 | 		self.replay_memory.append(experience)
18 | 
19 | 	def sample_from_memory(self):
20 | 		return random.sample(self.replay_memory, minibatch_size)
21 | 
22 | class RNNReplayBuffer:
23 | 	def __init__(self):
24 | 		self.replay_memory = deque(maxlen=replay_memory_capacity)
25 | 		self.paddings = None
26 | 
27 | 	def add_to_memory(self, experience):
28 | 		self.replay_memory.append(experience)
29 | 
30 | 		if self.paddings == None:
31 | 			obs = np.zeros(experience[-1][0].shape)
32 | 			self.paddings = (obs, 0, 0, obs, True)
33 | 
34 | 	def pad_trace(self, trace):
35 | 		trace.extend([self.paddings]*(trace_length-len(trace)))
36 | 		return trace
37 | 
38 | 	def sample_from_memory(self):
39 | 		if len(self.replay_memory) < minibatch_size:
40 | 			n_points_per_ep = int(np.ceil(minibatch_size * 1./len(self.replay_memory)))
41 | 			sampled_episodes = self.replay_memory
42 | 		else:
43 | 			n_points_per_ep = 1
44 | 			sampled_episodes = random.sample(self.replay_memory, minibatch_size)
45 | 		
46 | 		sampledTraces = []
47 | 		true_trace_length = np.ones(minibatch_size)*trace_length
48 | 
49 | 		for i in range(n_points_per_ep):
50 | 			for j, episode in enumerate(sampled_episodes):
51 | 				if len(episode) < trace_length:					
52 | 					true_trace_length[j] = len(episode)
53 | 					sampledTraces.append(self.pad_trace(episode)) # use the whole episode
54 | 				else:
55 | 					point = np.random.randint(0,len(episode) + 1 - trace_length)
56 | 					sampledTraces.append(episode[point:point + trace_length])
57 | 
58 | 		sampledTraces = np.array(sampledTraces[:minibatch_size]) # discard extra samples
59 | 		sampledTraces = np.reshape(sampledTraces,[minibatch_size*trace_length,-1])
60 | 		return sampledTraces, true_trace_length
61 | 


--------------------------------------------------------------------------------
/Predator-Prey/agents/simple_agent.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import config
 3 | FLAGS = config.flags.FLAGS
 4 | 
 5 | class RandomAgent(object):
 6 |     def __init__(self, action_dim):
 7 |         self._action_dim = action_dim
 8 | 
 9 |     def act(self, obs):
10 | 
11 |         if np.random.rand() < 3./8. :
12 |             return 2
13 |         else:
14 |             return np.random.randint(self._action_dim)
15 | 
16 |         # return 2
17 | 
18 |     def train(self, minibatch, step):
19 |         return
20 | 
21 | class StaticAgent(object):
22 |     def __init__(self, action):
23 |         self._action = action
24 | 
25 |     def act(self, obs):
26 |         return self._action
27 | 
28 |     def train(self, minibatch, step):
29 |         return
30 | class ActiveAgent(object):
31 |     def __init__(self, action_dim):
32 |         self._action_dim = action_dim
33 |         self._n_predator = FLAGS.n_predator
34 |         self._n_prey = FLAGS.n_prey
35 |         self._state_dim = 2 * (self._n_predator + self._n_prey)
36 | 
37 | 
38 |     def act(self, state, num):
39 |         state_i = self.state_to_index(state)
40 |         # s = np.reshape(state_i, [self._state_dim/2, 2])
41 |         self.map_size = FLAGS.map_size
42 |         threshold = self.map_size * 2.0
43 |         i = self._n_predator + num 
44 |         action_i = 2
45 |         if np.random.rand() < 1.0 :
46 |             return np.random.randint(self._action_dim)
47 |         pos_i = np.argwhere(np.array(state)==i+1)[0]
48 |         for j in range(FLAGS.n_predator):
49 |             pos_j = np.argwhere(np.array(state)==j+1)[0]
50 |             if abs(pos_i[0] - pos_j[0]) + abs(pos_i[1] - pos_j[1]) < threshold:
51 |                 p = np.zeros(5)
52 |                 threshold = abs(pos_i[0] - pos_j[0]) + abs(pos_i[1] - pos_j[1])
53 |                 if (pos_i[0] - pos_j[0]) >= abs((pos_i[1] - pos_j[1])):
54 |                     p[0] = 1
55 |                 elif (pos_i[1] - pos_j[1]) >= abs((pos_i[0] - pos_j[0])):
56 |                     p[1] = 1
57 |                 elif (pos_i[1] - pos_j[1]) <= -abs((pos_i[0] - pos_j[0])):
58 |                     p[3] = 1
59 |                 elif (pos_i[0] - pos_j[0]) <= -abs((pos_i[1] - pos_j[1])):
60 |                     p[4] = 1
61 |                 action_i = np.random.choice(self._action_dim, p=p/np.sum(p))
62 |         if threshold == 1:
63 |             return 2
64 |         return action_i
65 | 
66 |     def state_to_index(self, state):
67 |             """
68 |             For the single agent case, the state is only related to the position of agent 1
69 |             :param state:
70 |             :return:
71 |             """
72 |             # p1, p2 = self.get_predator_pos(state)
73 |             ret = np.zeros(self._state_dim)
74 |             for i in range(FLAGS.n_predator + FLAGS.n_prey):
75 |                 p = np.argwhere(np.array(state)==i+1)[0]
76 |                 #p = self.get_pos_by_id(state, i+1)
77 |                 ret[2 * i] = p[0]
78 |                 ret[2 * i + 1] = p[1]
79 | 
80 |             return ret
81 | 


--------------------------------------------------------------------------------
/Predator-Prey/config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf8
 3 | 
 4 | import tensorflow as tf
 5 | import logging
 6 | import time
 7 | import envs.config_env as config_env
 8 | import agents.config_agents as config_agent
 9 | 
10 | flags = tf.flags
11 | 
12 | flags.DEFINE_integer("seed", 0, "Random seed number")
13 | flags.DEFINE_string("folder", "default", "Result file folder name")
14 | flags.DEFINE_string("comment", "None",
15 |                     "Additional Comments")
16 | flags.DEFINE_boolean("gui", False, "Activate GUI")
17 | 
18 | config_env.config_env(flags)
19 | config_agent.config_agent(flags)
20 | 
21 | # Make result file with given filename
22 | now = time.localtime()
23 | s_time = "%02d%02d%02d%02d%02d" % (now.tm_mon, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec)
24 | file_name = str(flags.FLAGS.n_predator) + "-"
25 | file_name += config_env.get_filename() + "-" + config_agent.get_filename()
26 | file_name += "-seed-"+str(flags.FLAGS.seed)+"-" + s_time + "-" + flags.FLAGS.comment 
27 | result = logging.getLogger('Result')
28 | result.setLevel(logging.INFO)
29 | 
30 | if flags.FLAGS.folder == "default":
31 |     result_fh = logging.FileHandler("./results/eval/r-" + file_name + ".txt")
32 |     nn_filename = "./results/nn/n-" + file_name
33 |     tb_filename = "./results/board/tb-" + file_name
34 | else:
35 |     result_fh = logging.FileHandler("./results/eval/"+ flags.FLAGS.folder +"/r-" + file_name + ".txt")
36 |     nn_filename = "./results/nn/" + flags.FLAGS.folder + "/n-" + file_name
37 |     tb_filename = "./results/board/" + flags.FLAGS.folder + "/tb-" + file_name
38 | 
39 | result_fm = logging.Formatter('[%(filename)s:%(lineno)s] %(asctime)s\t%(message)s')
40 | result_fh.setFormatter(result_fm)
41 | result.addHandler(result_fh)
42 | 
43 | # Used to map colors to integers
44 | COLOR_TO_IDX = {
45 |     'red'   : 0,
46 |     'green' : 1,
47 |     'blue'  : 2,
48 |     'purple': 3,
49 |     'yellow': 4,
50 |     'grey'  : 5
51 | }
52 | 
53 | IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys()))
54 | 
55 | # Map of object type to integers
56 | OBJECT_TO_IDX = {
57 |     'empty'         : 0,
58 |     'wall'          : 1,
59 |     'agent'         : 2,
60 |     'predator'      : 3,
61 |     'prey'          : 4,
62 |     'prey2'         : 5
63 | }
64 | 
65 | IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys()))


--------------------------------------------------------------------------------
/Predator-Prey/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | # from gym.envs.registration import register
 3 | 
 4 | # Multiagent envs
 5 | # ----------------------------------------
 6 | 
 7 | # register(
 8 | #     id='MultiagentSimple-v0',
 9 | #     entry_point='multiagent.envs:SimpleEnv',
10 | #     # FIXME(cathywu) currently has to be exactly max_path_length parameters in
11 | #     # rllab run script
12 | #     max_episode_steps=100,
13 | # )
14 | 
15 | # register(
16 | #     id='MultiagentSimpleSpeakerListener-v0',
17 | #     entry_point='multiagent.envs:SimpleSpeakerListenerEnv',
18 | #     max_episode_steps=100,
19 | # )
20 | 
21 | logger_agent = logging.getLogger('GridMARL')
22 | logger_agent.setLevel(logging.INFO)
23 | 
24 | fm = logging.Formatter('[%(levelname)s|%(filename)s:%(lineno)s] %(asctime)s > [%(name)s] %(message)s')
25 | sh = logging.StreamHandler()
26 | sh.setFormatter(fm)
27 | logger_agent.addHandler(sh)
28 | 
29 | # fh_agent = logging.FileHandler('./agent.log')
30 | # fh_agent.setFormatter(fm)
31 | # logger_agent.addHandler(fh_agent)
32 | 


--------------------------------------------------------------------------------
/Predator-Prey/envs/config_env.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf8
 3 | 
 4 | 
 5 | 
 6 | def config_env(_flags):
 7 |     flags = _flags
 8 |     
 9 | 
10 |     # Scenario
11 |     flags.DEFINE_string("scenario", "pursuit", "Scenario")
12 |     flags.DEFINE_integer("n_predator", 2, "Number of predators")
13 |     flags.DEFINE_integer("n_prey1", 1, "Number of preys 1")
14 |     flags.DEFINE_integer("n_prey2", 1, "Number of preys 2")
15 |     flags.DEFINE_integer("n_prey", 2, "Number of preys")
16 |     # Observation
17 |     flags.DEFINE_integer("history_len", 1, "How many previous steps we look back")
18 | 
19 |     # core
20 |     flags.DEFINE_integer("map_size", 3, "Size of the map")
21 |     flags.DEFINE_float("render_every", 1000, "Render the nth episode")
22 | 
23 |     # Penalty
24 |     flags.DEFINE_integer("penalty", 1, "reward penalty")
25 | 
26 | def get_filename():
27 |     import config
28 |     FLAGS = config.flags.FLAGS
29 | 
30 |     return "s-"+FLAGS.scenario+"-map-"+str(FLAGS.map_size)+"-penalty-"+str(FLAGS.penalty)


--------------------------------------------------------------------------------
/Predator-Prey/envs/environment.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym import spaces
  3 | import numpy as np
  4 | 
  5 | # environment for all agents in the multiagent world
  6 | # currently code assumes that no agents will be created/destroyed at runtime!
  7 | 
  8 | 
  9 | class MultiAgentEnv(gym.Env):
 10 |     metadata = {
 11 |         'render.modes': ['human', 'rgb_array']
 12 |     }
 13 | 
 14 |     def __init__(self, world, reset_callback=None, reward_callback=None,
 15 |                  observation_callback=None, info_callback=None,
 16 |                  done_callback=None, shared_viewer=True):
 17 | 
 18 |         self.world = world
 19 |         self.agents = self.world.agents
 20 |         # set required vectorized gym env property
 21 |         self.n = len(world.agents)
 22 |         # scenario callbacks
 23 |         self.reset_callback = reset_callback
 24 |         self.reward_callback = reward_callback
 25 |         self.observation_callback = observation_callback
 26 |         self.info_callback = info_callback
 27 |         self.done_callback = done_callback
 28 |       
 29 |         # environment parameters
 30 |         self.discrete_comm_space = True
 31 |         self.time = 0
 32 | 
 33 |         # configure spaces
 34 |         self.action_space = []
 35 |         self.observation_space = []
 36 |         self.agent_precedence = []
 37 |         for agent in self.agents:
 38 |             self.agent_precedence.append(agent.itype)
 39 |             total_action_space = []
 40 |             u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
 41 |             total_action_space.append(u_action_space)
 42 | 
 43 |             # communication action space
 44 |             if self.discrete_comm_space:
 45 |                 c_action_space = spaces.Discrete(world.dim_c)
 46 |             else:
 47 |                 c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,))
 48 | 
 49 |             if not agent.silent:
 50 |                 total_action_space.append(c_action_space)
 51 | 
 52 |             # total action space
 53 |             if len(total_action_space) > 1:
 54 |                 # all action spaces are discrete, so simplify to MultiDiscrete action space
 55 |                 if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]):
 56 |                     act_space = spaces.MultiDiscrete([act_space.n for act_space in total_action_space])
 57 |                 else: 
 58 |                     act_space = spaces.Tuple(total_action_space)
 59 |                 self.action_space.append(act_space)
 60 |             else:
 61 |                 self.action_space.append(total_action_space[0])
 62 | 
 63 |             # observation space
 64 |             obs_dim = len(observation_callback(agent, self.world).flatten())
 65 |             self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,)))
 66 |             agent.action.c = np.zeros(self.world.dim_c)
 67 | 
 68 |     def get_agent_profile(self):
 69 |         agent_profile = {}
 70 | 
 71 |         for i, agent in enumerate(self.agents):
 72 |             if agent.itype in agent_profile:
 73 |                 agent_profile[agent.itype]['n_agent'] += 1
 74 |                 agent_profile[agent.itype]['idx'].append(i)
 75 |             else:
 76 |                 if isinstance(self.action_space[i], spaces.Discrete):
 77 |                     act_space = self.action_space[i].n
 78 |                     com_space = 0
 79 |                 else:
 80 |                     act_space = self.action_space[i].nvec[0]
 81 |                     com_space = self.action_space[i].nvec[1]
 82 | 
 83 |                 agent_profile[agent.itype] = {
 84 |                     'n_agent': 1,
 85 |                     'idx': [i],
 86 |                     'act_dim': act_space,
 87 |                     'com_dim': com_space,
 88 |                     'obs_dim': self.observation_space[i].shape
 89 |                 }
 90 | 
 91 |         return agent_profile
 92 | 
 93 |     def step(self, action_n):
 94 |         obs_n = []
 95 |         reward_n = []
 96 |         done_n = []
 97 |         reset_n = []
 98 |         info_n = {'n': []}
 99 | 
100 |         self.agents = self.world.agents
101 |         self.world.step(action_n)
102 | 
103 |         for agent in self.agents:      
104 |             reward_n.append(self._get_reward(agent))  
105 | 
106 |         for agent in self.agents:  
107 |             info_n['n'].append(self._get_info(agent))
108 |         
109 |         for agent in self.agents:
110 |             reset_n.append(self._get_done(agent))
111 |             # done_n.append(self._get_done(agent))
112 | 
113 |         i = 0
114 |         for agent in self.agents:
115 |             if reset_n[i] == True: 
116 |                 self.world.resetObj(agent)
117 |             i += 1
118 |         for agent in self.agents:
119 |             done_n.append(False)
120 | 
121 |         for agent in self.agents:
122 |             obs_n.append(self._get_obs(agent))
123 | 
124 |         return obs_n, reward_n, done_n, info_n
125 | 
126 |     def reset(self):
127 |         # reset world
128 |         self.reset_callback(self.world)
129 | 
130 |         obs_n = []
131 |         for agent in self.agents:
132 |             obs_n.append(self._get_obs(agent))
133 |         return obs_n
134 | 
135 |     # get info used for benchmarking
136 |     def _get_info(self, agent):
137 |         if self.info_callback is None:
138 |             return {}
139 |         return self.info_callback(agent, self.world)
140 | 
141 |     # get observation for a particular agent
142 |     def _get_obs(self, agent):
143 |         if self.observation_callback is None:
144 |             return np.zeros(0)
145 |         return self.observation_callback(agent, self.world)
146 | 
147 |     # get dones for a particular agent
148 |     # unused right now -- agents are allowed to go beyond the viewing screen
149 |     def _get_done(self, agent):
150 |         if self.done_callback is None:
151 |             return False
152 |         return self.done_callback(agent, self.world)
153 | 
154 |     def _get_done(self, agent):
155 |         if self.done_callback is None:
156 |             return False
157 |         return self.done_callback(agent, self.world)
158 | 
159 |     # get reward for a particular agent
160 |     def _get_reward(self, agent):
161 |         if self.reward_callback is None:
162 |             return 0.0
163 |         return self.reward_callback(agent, self.world)
164 | 
165 |     def get_full_encoding(self):
166 |         return self.world.get_full_encoding()


--------------------------------------------------------------------------------
/Predator-Prey/envs/grid_core.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import config
  3 | 
  4 | 
  5 | COLOR_TO_IDX = config.COLOR_TO_IDX
  6 | OBJECT_TO_IDX = config.OBJECT_TO_IDX
  7 | 
  8 | N = 0
  9 | E = 1
 10 | O = 2
 11 | W = 3
 12 | S = 4
 13 | 
 14 | # action of the agent
 15 | class Action(object):
 16 |     def __init__(self):
 17 |         # physical action
 18 |         self.u = None
 19 |         # communication action
 20 |         self.c = None
 21 | 
 22 | # properties and state of physical world entity
 23 | class Entity(object):
 24 |     def __init__(self, itype, color):
 25 |         assert itype in OBJECT_TO_IDX, itype
 26 |         assert color in COLOR_TO_IDX, color
 27 |         self.itype = itype
 28 |         self.color = color
 29 |         self.contains = None
 30 | 
 31 |         # name 
 32 |         self.name = ''
 33 |         # properties:
 34 |         self.movable = False
 35 |         # entity collides with others
 36 |         self.collide = True
 37 |         # material density (affects mass)
 38 |         self.density = 25.0
 39 | 
 40 |     @property
 41 |     def pos(self):
 42 |         return self._x, self._y
 43 | 
 44 |     def set_pos(self, x, y):
 45 |         self._x = x
 46 |         self._y = y
 47 | 
 48 | # properties of agent entities
 49 | class CoreAgent(Entity):
 50 |     def __init__(self, itype='agent', color='green'):
 51 |         super(CoreAgent, self).__init__(itype, color)
 52 |         self.name = ""
 53 |         # agents are movable by default
 54 |         self.movable = True
 55 |         # cannot send communication signals
 56 |         self.silent = True
 57 |         # action
 58 |         self.action = Action()
 59 |         # if waiting for other agents action
 60 |         self.waiting = False
 61 |         # if done doing its action in the current step
 62 |         self.done_moving = False
 63 |         # if the intended step collided 
 64 |         self.collided = False
 65 | 
 66 |         self._obs = None
 67 |         self._x = 0
 68 |         self._y = 0
 69 |         self.obs_range = 1
 70 | 
 71 |     def update_obs(self, obs):
 72 |         self._obs = obs
 73 | 
 74 |     def get_obs(self):
 75 |         return self._obs
 76 | 
 77 | class Wall(Entity):
 78 |     def __init__(self, color='grey'):
 79 |         super(Wall, self).__init__('wall', color)
 80 | 
 81 | class Grid(object):
 82 |     """
 83 |     Represent a grid and operations on it
 84 |     """
 85 | 
 86 |     def __init__(self, width, height):
 87 |         assert width >= 2
 88 |         assert height >= 2
 89 | 
 90 |         self.width = width
 91 |         self.height = height
 92 |         self.reset()
 93 | 
 94 |     def set(self, i, j, v):
 95 |         assert i >= 0 and i < self.width
 96 |         assert j >= 0 and j < self.height
 97 |         self.grid[j * self.width + i] = v
 98 | 
 99 |     def get(self, i, j):
100 |         if ((i >= 0 and i < self.width) and \
101 |             (j >= 0 and j < self.height)):
102 |             return self.grid[j * self.width + i]
103 | 
104 |         return Wall()
105 | 
106 |     def reset(self):
107 |         self.grid = [None] * self.width * self.height
108 | 
109 |     def setHorzWall(self, x, y, length=None):
110 |         if length is None:
111 |             length = self.width - x
112 |         for i in range(0, length):
113 |             self.set(x + i, y, Wall())
114 | 
115 |     def setVertWall(self, x, y, length=None):
116 |         if length is None:
117 |             length = self.height - y
118 |         for j in range(0, length):
119 |             self.set(x, y + j, Wall())
120 | 
121 |     def wallRect(self, x, y, w, h):
122 |         self.setHorzWall(x, y, w)
123 |         self.setHorzWall(x, y+h-1, w)
124 |         self.setVertWall(x, y, h)
125 |         self.setVertWall(x+w-1, y, h)
126 | 
127 |     def slice(self, topX, topY, width, height):
128 |         """
129 |         Get a subset of the grid
130 |         """
131 | 
132 |         grid = Grid(width, height)
133 | 
134 |         for j in range(0, height):
135 |             for i in range(0, width):
136 |                 x = topX + i
137 |                 y = topY + j
138 | 
139 |                 if x >= 0 and x < self.width and \
140 |                    y >= 0 and y < self.height:
141 |                     v = self.get(x, y)
142 |                 else:
143 |                     v = Wall()
144 | 
145 |                 grid.set(i, j, v)
146 | 
147 |         return grid
148 | 
149 |     def encode(self):
150 |         """
151 |         Produce a compact numpy encoding of the grid
152 |         """
153 | 
154 |         array = np.zeros(shape=(self.height, self.width, 3), dtype=np.int8)
155 | 
156 |         for j in range(0, self.height):
157 |             for i in range(0, self.width):
158 | 
159 |                 v = self.get(i, j)
160 |                 if isinstance(v, CoreAgent):
161 |                     array[j, i, 2] = v.id
162 | 
163 |                 if v == None:
164 |                     continue
165 | 
166 | 
167 |                 array[j, i, 0] = OBJECT_TO_IDX[v.itype]
168 |                 array[j, i, 1] = COLOR_TO_IDX[v.color]
169 | 
170 |         return array
171 | 
172 | # multi-agent world
173 | class World(object):
174 |     def __init__(self, width, height):
175 |         # list of agents and entities (can change at execution-time!)
176 |         self.agents = []
177 | 
178 |         # communication channel dimensionality
179 |         self.dim_c = 0
180 |         # position dimensionality
181 |         self.dim_p = 2
182 | 
183 |         self.width = width
184 |         self.height = height
185 | 
186 |         self.grid = Grid(self.width, self.height)
187 |         self.grid.wallRect(0, 0, self.width, self.height)
188 | 
189 |         self.step_cnt = 0
190 | 
191 |     def empty_grid(self):
192 |         self.step_cnt = 0
193 |         self.grid.reset()
194 | 
195 |     def placeObj(self, obj, top=None, size=None, reject_fn=None):
196 |         """
197 |         Place an object at an empty position in the grid
198 | 
199 |         :param top: top-left position of the rectangle where to place
200 |         :param size: size of the rectangle where to randomly place
201 |         :param reject_fn: function to filter out potential positions
202 |         """
203 | 
204 |         if top is None:
205 |             top = (0, 0)
206 | 
207 |         if size is None:
208 |             size = (self.grid.width, self.grid.height)
209 | 
210 |         while True:
211 |             pos = (
212 |                 np.random.randint(top[0], top[0] + size[0]),
213 |                 np.random.randint(top[1], top[1] + size[1])
214 |             )
215 | 
216 |             # Don't place the object on top of another object
217 |             if self.grid.get(*pos) != None:
218 |                 continue
219 | 
220 |             # Check if there is a filtering criterion
221 |             if reject_fn and reject_fn(self, pos):
222 |                 continue
223 | 
224 |             break
225 | 
226 |         self.grid.set(pos[0], pos[1], obj)
227 |         obj.set_pos(pos[0], pos[1])
228 |         return pos
229 | 
230 |     def resetObj(self, obj, top=None, size=None, reject_fn=None):
231 |         """
232 |         Reset an object at an empty position in the grid
233 | 
234 |         :param top: top-left position of the rectangle where to place
235 |         :param size: size of the rectangle where to randomly place
236 |         :param reject_fn: function to filter out potential positions
237 |         """
238 |         if top is None:
239 |             top = (0, 0)
240 | 
241 |         if size is None:
242 |             size = (self.grid.width, self.grid.height)
243 | 
244 |         while True:
245 |             pos = (
246 |                 np.random.randint(top[0], top[0] + size[0]),
247 |                 np.random.randint(top[1], top[1] + size[1])
248 |             )
249 | 
250 |             # Don't place the object on top of another object
251 |             if self.grid.get(*pos) != None:
252 |                 continue
253 | 
254 |             # Check if there is a filtering criterion
255 |             if reject_fn and reject_fn(self, pos):
256 |                 continue
257 | 
258 |             break
259 |         x, y = obj.pos
260 |         self.grid.set(x, y, None)
261 |         self.grid.set(pos[0], pos[1], obj)
262 |         obj.set_pos(pos[0], pos[1])
263 |         return pos
264 | 
265 |     def single_agent_step(self, agent, action):
266 |         if agent.done_moving or agent.waiting:
267 |             return
268 | 
269 |         x, y = agent.pos
270 |         action = agent.action.u
271 | 
272 |         if   action == N:
273 |             y -= 1
274 |         elif action == E:
275 |             x -= 1
276 |         elif action == W:
277 |             x += 1
278 |         elif action == S:
279 |             y += 1
280 |         elif action == O:
281 |             agent.done_moving = True
282 |             agent.collided = False
283 |             return
284 | 
285 |         intended_cell = self.grid.get(x, y)
286 |         if isinstance(intended_cell, CoreAgent):
287 |             agent.waiting = True
288 |             # let the other agent move first
289 |             self.single_agent_step(intended_cell, intended_cell.action.u)
290 |             agent.waiting = False
291 |             # get the intended cell (to check if it is empty)
292 |             intended_cell = self.grid.get(x, y)
293 | 
294 |         # check if the intended cell is empty
295 |         if not intended_cell is None:  
296 |             agent.collided = True
297 |         else:
298 |             x_0, y_0 = agent.pos
299 |             self.grid.set(x_0, y_0, None)
300 |             self.grid.set(x, y, agent)
301 |             agent.set_pos(x, y)
302 |             agent.collided = False
303 | 
304 |         agent.done_moving = True
305 | 
306 |     # update state of the world
307 |     def step(self, action_n):
308 |         self.step_cnt += 1
309 |         # set the action
310 |         for i, agent in enumerate(self.agents):
311 |             agent.action.u = action_n[i]
312 |             agent.done_moving = False
313 |             
314 |         # do the action
315 |         for agent in self.agents:
316 |             self.single_agent_step(agent, agent.action.u)
317 | 
318 |         # update observations of all agents
319 |         self.set_observations()
320 | 
321 |     def set_observations(self):
322 |         for agent in self.agents:
323 |             x, y = agent.pos
324 |             r = agent.obs_range
325 |             obs = self.grid.slice(x-r, y-r,r*2+1,r*2+1)
326 |             agent.update_obs(obs.encode())
327 | 
328 |     def get_full_encoding(self):
329 |         return self.grid.encode()


--------------------------------------------------------------------------------
/Predator-Prey/envs/gui/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sonkyunghwan/QTRAN/785c44ebc8379896dc9f513af2ac767d61013914/Predator-Prey/envs/gui/__init__.py


--------------------------------------------------------------------------------
/Predator-Prey/envs/gui/canvas.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # ---------------------------------------------------------------
  3 | # Display on GUI the positions of predator agents and prey agents
  4 | # ---------------------------------------------------------------
  5 | 
  6 | import random
  7 | import socket
  8 | import threading
  9 | import json
 10 | import pygame
 11 | import ConfigParser
 12 | 
 13 | from time import sleep
 14 | from math import pi, sin, cos, sqrt, ceil, floor
 15 | from envs.gui.guiObjects import guiPred, guiPrey # no need for camera view
 16 | 
 17 | GREY = (25, 25, 25, 128)
 18 | WHITE = (255, 255, 255, 0)
 19 | ORANGE = (255, 100, 0, 128)
 20 | RED = (255, 0, 0)
 21 | GREEN = (0, 153, 76, 128)
 22 | 
 23 | edge_len_pix = 960 # This is the length of each edge in pixels
 24 | # Note that we only consider square maps, which will be drawn on square PyGame surfaces
 25 | 
 26 | # # Toy parameters (not yet discretized into the grid-world setting)
 27 | # positions = [12, 245, 1003, 298, 933, 393, 1100, 28, 222, 353]
 28 | # schedule = [0, 1, 0]
 29 | 
 30 | # Toy parameters (now discretized into the grid-world setting)
 31 | positions = [0, 1, 3, 4, 4, 1, 2, 4, 3, 1]
 32 | schedule = [0, 1, 0]
 33 | 
 34 | class Canvas():
 35 |     def __init__(self, num_pred = 3, num_prey = 2, map_size = 5):
 36 |         # Take resolution and number of trackers as argument
 37 | 
 38 |         self.num_pred = num_pred
 39 |         self.num_prey = num_prey
 40 |         self.map_size = map_size
 41 | 
 42 |         # Define grid locator parameter
 43 |         # Since we're dealing with a square grid-world,
 44 |         # one locator is enough for both the x- and y-coordinates
 45 | 
 46 |         self.locator = int(edge_len_pix/self.map_size)
 47 | 
 48 |         # --- Some PyGame-related initialization ---
 49 |         pygame.init()
 50 |         self.clock = pygame.time.Clock()
 51 |         self.display_surface = pygame.display.set_mode((edge_len_pix, edge_len_pix))
 52 |         pygame.display.set_caption("Predator Prey Simulator")
 53 |         self.movable_surface = pygame.Surface((edge_len_pix, edge_len_pix))
 54 |         self.message_surface = pygame.Surface((edge_len_pix, 32), pygame.SRCALPHA)
 55 |         self.message_surface = self.message_surface.convert_alpha()
 56 |         self.done_surface = pygame.Surface((edge_len_pix, edge_len_pix), pygame.SRCALPHA)
 57 |         self.done_surface = self.done_surface.convert_alpha()
 58 |         self.mx = self.movable_surface.get_width()
 59 |         self.my = self.movable_surface.get_height()
 60 |         self.done = False
 61 | 
 62 |         # --- Diplay screen resolution ---
 63 |         # For displaying the message from the learning module
 64 |         self.fs = 32
 65 |         self.font = pygame.font.SysFont(pygame.font.get_default_font(), self.fs)
 66 |         
 67 |         # Frame is fixed
 68 |         self.framex = edge_len_pix
 69 |         self.framey = edge_len_pix
 70 | 
 71 |         # Movable surface is variable
 72 |         self.wx = self.mx
 73 |         self.wy = self.my
 74 |         self.zoom_sensitivity = 1.02 # Change this to zoom faster
 75 |         self.pan_sensitivity = 5 # Change this to move screen faster
 76 |         self.sx = 0
 77 |         self.sy = 0
 78 | 
 79 |         # --- Testing for scroll ---
 80 |         self.tx = 0
 81 |         self.ty = 0
 82 |         
 83 |         self.center_mark_size_px = 10
 84 |         self.center_mark_thickness_px = 1
 85 |         self.button_size_px = 50
 86 |         
 87 |         self.guiObjectsList = []
 88 | 
 89 |         # Some viewing margin for the button spacing
 90 |         self.vmargin = 5
 91 | 
 92 |         # Correctors for intuitive viewing
 93 |         self.angle_corrector = 90
 94 |         self.x_corrector = self.mx/2
 95 |         self.y_corrector = self.my/2
 96 |         self.cam_view_scaler = 2
 97 | 
 98 |         self.button_value = 2
 99 |         
100 |     def setup(self):
101 | 
102 |         # --- guiObjects setup ---
103 |         # Randomly positioned for now... get real values later
104 |         self.target_cnt = self.num_prey # Allow only one target
105 |         self.target_size_px = 20 # The size of the target in pixels
106 | 
107 |         self.btn_pause_surface = pygame.Surface((self.button_size_px, self.button_size_px), pygame.SRCALPHA)
108 |         self.btn_pause_surface = self.btn_pause_surface.convert_alpha()
109 |         self.btn_pause_surface.fill(WHITE)
110 | 
111 |         self.btn_play_surface = pygame.Surface((self.button_size_px, self.button_size_px), pygame.SRCALPHA)
112 |         self.btn_play_surface = self.btn_play_surface.convert_alpha()
113 |         self.btn_play_surface.fill(WHITE)
114 | 
115 |         self.btn_ff_surface = pygame.Surface((self.button_size_px, self.button_size_px), pygame.SRCALPHA)
116 |         self.btn_ff_surface = self.btn_ff_surface.convert_alpha()
117 |         self.btn_ff_surface.fill(WHITE)
118 |         
119 |         self.button_press_reactor = {"pause":0, "play":0, "ff":0}
120 | 
121 | 
122 |         
123 |         # Append the predators first and then the preys
124 |         for i in range(self.num_pred):
125 |             self.pred = guiPred(pred_id = i)
126 |             self.pred.setup()
127 |             self.guiObjectsList.append(self.pred)
128 | 
129 |         for j in range(self.num_prey):
130 |             self.prey = guiPrey(prey_id = j)
131 |             self.prey.setup()
132 |             self.guiObjectsList.append(self.prey)
133 |         # guiObjectsList looks like this [pred0, pred1, ..., pred(num_pred-1), prey0, prey1, ..., prey(num_prey-1)]
134 | 
135 |         
136 |     def button(self, text, bx, by, bw, bh, ac, ic, surface):
137 |         mouse = pygame.mouse.get_pos()
138 |         click = pygame.mouse.get_pressed()
139 |         if bx + bw > mouse[0] > bx and by + bh > mouse[1] > by:
140 |             pygame.draw.rect(surface, ac, (bx, by, self.button_size_px, self.button_size_px))
141 |             if click[0] == 1:
142 |                 pygame.draw.rect(surface, (255, 255, 0, 128), (bx, by, self.button_size_px, self.button_size_px))
143 |         else:
144 |             pygame.draw.rect(surface, ic, (bx, by, self.button_size_px, self.button_size_px))
145 | 
146 |         button_font = pygame.font.SysFont(pygame.font.get_default_font(), 20)
147 |         button_label = button_font.render(text, True, (0, 0, 0))
148 |         surface.blit(button_label, (self.button_size_px/2 - button_font.size(text)[0]/2, self.button_size_px/2 - button_font.size(text)[1]/2))
149 |         self.display_surface.blit(surface, (bx, by))
150 |             
151 |     def make_border(self, obj):
152 | 
153 |         pygame.draw.rect(obj.surface, obj.border_color, [0, 0, obj.sy, obj.border_thickness])
154 |         pygame.draw.rect(obj.surface, obj.border_color, [0, obj.sy - obj.border_thickness, obj.sy, obj.border_thickness])
155 |         pygame.draw.rect(obj.surface, obj.border_color, [0, 0, obj.border_thickness, obj.sy])
156 |         pygame.draw.rect(obj.surface, obj.border_color, [obj.sx - obj.border_thickness, 0, obj.border_thickness, obj.sy])
157 |                     
158 |     def draw(self, positions, schedule, msg=None, done=False):
159 |         # positions is a list of x, y describing the x, y coordinates of each agent
160 |         # schedule is a list whose elements are either 0 or 1, signifying that the agent is scheduled (1) or not (0)
161 |         # while not self.done:
162 |         mouse_pos = pygame.mouse.get_pos()
163 |         for event in pygame.event.get():
164 |             if ((event.type == pygame.QUIT) or ((event.type == pygame.KEYDOWN) and (event.key == pygame.K_q))):
165 |                 self.done = True
166 |             if event.type == pygame.MOUSEBUTTONDOWN:
167 | 
168 |                 # --- Buttons ---
169 |                 if self.framey - self.button_size_px - self.vmargin < mouse_pos[1] < self.framey - self.vmargin:
170 |                     # Pause button
171 |                     if self.vmargin < mouse_pos[0] < self.vmargin + self.button_size_px:
172 |                         if event.type == pygame.MOUSEBUTTONDOWN:
173 |                             self.button_press_reactor["pause"] = min(255, self.button_press_reactor["pause"] + 200)
174 |                             # sent = self.conn.send("pause") # TODO
175 |                             self.button_value = 0
176 | 
177 | 
178 |                     # Play button
179 |                     if 2*self.vmargin + self.button_size_px < mouse_pos[0] < 2*self.vmargin + 2*self.button_size_px:
180 |                         if event.type == pygame.MOUSEBUTTONDOWN:
181 |                             self.button_press_reactor["play"] = min(255, self.button_press_reactor["play"] + 200)
182 |                             # sent = self.conn.send("play") # TODO
183 |                             self.button_value = 1
184 | 
185 | 
186 |                     # Fast-forward button
187 |                     if 3*self.vmargin + 2*self.button_size_px < mouse_pos[0] < 3*self.vmargin + 3*self.button_size_px:
188 |                         if event.type == pygame.MOUSEBUTTONDOWN:
189 |                             self.button_press_reactor["ff"] = min(255, self.button_press_reactor["ff"] + 200)
190 |                             # sent = self.conn.send("ff") # TODO
191 |                             self.button_value = 2
192 | 
193 |             if event.type == pygame.MOUSEBUTTONDOWN:
194 |                 if event.button == 4:
195 |                     self.wx *= self.zoom_sensitivity
196 |                     self.wy *= self.zoom_sensitivity
197 |                 if event.button == 5:
198 |                     self.wx /= self.zoom_sensitivity
199 |                     self.wy /= self.zoom_sensitivity
200 | 
201 |         pressed = pygame.key.get_pressed()
202 |         if pressed[pygame.K_w]: self.sy += self.pan_sensitivity
203 |         if pressed[pygame.K_s]: self.sy -= self.pan_sensitivity
204 |         if pressed[pygame.K_a]: self.sx += self.pan_sensitivity
205 |         if pressed[pygame.K_d]: self.sx -= self.pan_sensitivity
206 | 
207 |         # --- Fill background ---
208 |         self.display_surface.fill(GREY)
209 |         self.movable_surface.fill((255, 255, 255, 0))
210 |         self.message_surface.fill(GREY)
211 |         self.done_surface.fill((255, 255, 0, 128))
212 |         self.btn_pause_surface.fill((0+self.button_press_reactor["pause"], 153, 76, 128))
213 |         self.btn_play_surface.fill((0+self.button_press_reactor["play"], 153, 76, 128))
214 |         self.btn_ff_surface.fill((0+self.button_press_reactor["ff"], 153, 76, 128))
215 | 
216 |         for button in self.button_press_reactor:
217 |             self.button_press_reactor[button] = max(0, self.button_press_reactor[button]-1)
218 | 
219 | 
220 | 
221 |         # --- Position update ----------------------------------------------
222 |         # Call some get_pos() function here by asking the Environment
223 |         # Then, update the guiObjects' positions accordingly
224 | 
225 |         # RECV_UPDATE() function runs on its own thread now.
226 |         # This is to accept asynchronous inputs from
227 |         # (i) remote server and (ii) local keyboard input for zooming/panning.
228 | 
229 |         for obj in self.guiObjectsList:
230 |             # Fill the surface of target and drone objects
231 |             if (("predator" in obj.name) or (obj.name == "prey")):
232 |                 obj.surface.fill(WHITE)
233 | 
234 |         # --- guiObject update ---
235 | 
236 |         # Re-draw target circle
237 |         cnt = 0
238 |         for obj in self.guiObjectsList:
239 |             if obj.name == "prey":
240 |                 obj.surface = pygame.transform.scale(obj.surface, (int(2*obj.z), int(2*obj.z)))
241 |                 if schedule[cnt] == True:
242 |                     pygame.draw.circle(obj.surface, obj.eye_color, (int(obj.z), int(obj.z)), int(obj.z), 0)
243 |                 else:
244 | 
245 |                     pygame.draw.circle(obj.surface, obj.color, (int(obj.z), int(obj.z)), int(obj.z), 0)
246 |                 # if schedule[cnt] == True:
247 |                 #     pygame.draw.circle(obj.surface, obj.eye_color, (int(obj.z), int(obj.z)), int(obj.z), 3)
248 |             cnt += 1
249 | 
250 |         # Re-draw drone objects
251 |         cnt = 0
252 |         for obj in self.guiObjectsList:
253 |             if "predator" in obj.name:
254 |                 # Re-scale each surface so that each guiObject can fit in it
255 |                 obj.surface = pygame.transform.scale(obj.surface, (int(2*obj.z), int(2*obj.z)))
256 | 
257 |                 # Re-draw objects according to z-coordinate (their size will vary)
258 |                 pygame.draw.circle(obj.surface, obj.body_color, (int(obj.z), int(obj.z)), int(obj.z), 0)
259 | 
260 |                 # if schedule[cnt] == 1:
261 |                 #     pygame.draw.circle(obj.surface, obj.eye_color, (int(obj.z), int(obj.z)), int(obj.z), 3)
262 |                 cnt += 1
263 |         # --- Canvas update ---
264 |         # Re-drawing is called "blitting"!
265 | 
266 |         # Blit hierarchy follows this order:
267 |         # [BOTTOM LEVEL] <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< [TOP LEVEL]
268 |         # guiObject.label <<< guiObject.surface <<< movable_surface <<< display_surface
269 | 
270 |         # guiObject.surface.blit(guiObject.label, [position]) : write label on object's surface
271 |         # movable_surface.blit(guiObject.surface, [position]) : draw object's surface onto movable surface
272 |         # display_surface.blit(movable_surface, [position])   : draw movable surface onto a position-fixed display surface
273 | 
274 |         # Bottom-level blit
275 |         for guiObject in self.guiObjectsList:
276 |             # Target
277 |             if guiObject.name == "prey":
278 |                 guiObject.surface.blit(guiObject.label, (int(guiObject.sx/2 - guiObject.font.size(guiObject.text)[0]/2), int(guiObject.sy/2 - guiObject.font.size(guiObject.text)[1]/2)))
279 | 
280 |             # Drones
281 |             elif "predator" in guiObject.name:
282 |                 guiObject.surface.blit(guiObject.label, (int(guiObject.z - guiObject.font.size(guiObject.text)[0]/2), int(guiObject.z - guiObject.font.size(guiObject.text)[1]/2)))
283 | 
284 |         # Note that we no longer have updates from the socket
285 |         # Now we have positions updated directly from the vector POSITIONS
286 |         # Positions update before blitting onto the movable surface
287 |         for i in range(len(self.guiObjectsList)):
288 |             self.guiObjectsList[i].x = positions[2*i]*self.locator + int(self.locator/2)
289 |             self.guiObjectsList[i].y = positions[2*i+1]*self.locator + int(self.locator/2)
290 | 
291 |         # Mid-level blit
292 | 
293 |         # Writing the message onto the message surface
294 |         self.text = str(msg)
295 |         self.label = self.font.render(self.text, True, (255, 255, 255))
296 |         self.message_surface.blit(self.label, (8, 4))
297 | 
298 |         # Draw the grid lines
299 |         for i in range(self.map_size):
300 |             pygame.draw.line(self.movable_surface, GREY, (i*self.locator, 0), (i*self.locator, edge_len_pix))
301 |             pygame.draw.line(self.movable_surface, GREY, (0, i*self.locator), (edge_len_pix, i*self.locator))
302 | 
303 |         for obj in self.guiObjectsList:
304 |             # Target and Drones
305 |             if obj.name == "prey":
306 |                 self.movable_surface.blit(obj.surface, (int(obj.x - obj.z), int(obj.y - obj.z)))
307 |             elif "predator" in obj.name:
308 |                 self.movable_surface.blit(obj.surface, (int(obj.x - obj.z), int(obj.y - obj.z)))
309 |             elif (obj.name == "center"):
310 |                 self.movable_surface.blit(obj.surface, (int(self.framex/2 - obj.sx/2), int(self.framey/2 - obj.sy/2)))
311 | 
312 |         # Top-level blit
313 | 
314 |         # Blitting the movable surface onto the display surface
315 |         self.display_surface.blit(pygame.transform.scale(self.movable_surface, (int(self.wx), int(self.wy))), (int((self.framex - self.wx)/2 + self.sx), int((self.framey - self.wy)/2 + self.sy)))
316 | 
317 |         # Blitting the message surface onto the display surface
318 |         self.display_surface.blit(self.message_surface, (0, 0))
319 | 
320 |         if done:
321 |             self.display_surface.blit(self.done_surface, (0, 0))
322 | 
323 |         # Re-draw buttons
324 |         self.button("PAUSE", self.vmargin, self.framey - self.vmargin - self.button_size_px, self.button_size_px, self.button_size_px, (0, 255, 0, 128), GREEN, self.btn_pause_surface)
325 |         self.button("PLAY", 2*self.vmargin + self.button_size_px, self.framey - self.vmargin - self.button_size_px, self.button_size_px, self.button_size_px, (0, 255, 0, 128), GREEN, self.btn_play_surface)
326 |         self.button("FF", 3*self.vmargin + 2*self.button_size_px, self.framey - self.vmargin - self.button_size_px, self.button_size_px, self.button_size_px, (0, 255, 0, 128), GREEN, self.btn_ff_surface)
327 | 
328 |         pygame.display.update()
329 | 
330 | 
331 |         if self.button_value == 1:
332 |             sleep(0.5)
333 | 
334 |         elif self.button_value == 0:
335 |             self.button_value = 3
336 |             while self.button_value == 3:
337 |                 sleep(0.1)
338 |                 mouse_pos = pygame.mouse.get_pos()
339 |                 for event in pygame.event.get():
340 |                     if event.type == pygame.MOUSEBUTTONDOWN:
341 | 
342 |                         # --- Buttons ---
343 |                         if self.framey - self.button_size_px - self.vmargin < mouse_pos[1] < self.framey - self.vmargin:
344 |                             # Pause button
345 |                             if self.vmargin < mouse_pos[0] < self.vmargin + self.button_size_px:
346 |                                 if event.type == pygame.MOUSEBUTTONDOWN:
347 |                                     self.button_press_reactor["pause"] = min(255, self.button_press_reactor["pause"] + 200)
348 |                                     # sent = self.conn.send("pause") # TODO
349 |                                     self.button_value = 0
350 | 
351 |                             # Play button
352 |                             if 2 * self.vmargin + self.button_size_px < mouse_pos[
353 |                                 0] < 2 * self.vmargin + 2 * self.button_size_px:
354 |                                 if event.type == pygame.MOUSEBUTTONDOWN:
355 |                                     self.button_press_reactor["play"] = min(255, self.button_press_reactor["play"] + 200)
356 |                                     # sent = self.conn.send("play") # TODO
357 |                                     self.button_value = 1
358 | 
359 |                             # Fast-forward button
360 |                             if 3 * self.vmargin + 2 * self.button_size_px < mouse_pos[
361 |                                 0] < 3 * self.vmargin + 3 * self.button_size_px:
362 |                                 if event.type == pygame.MOUSEBUTTONDOWN:
363 |                                     self.button_press_reactor["ff"] = min(255, self.button_press_reactor["ff"] + 200)
364 |                                     # sent = self.conn.send("ff") # TODO
365 |                                     self.button_value = 2
366 | 
367 | 
368 |         return 0
369 | 
370 | 
371 | 
372 | 
373 | 
374 |         
375 | if __name__ == "__main__":
376 |     canvas = Canvas(3, 2, 8)
377 |     canvas.setup()
378 |     canvas.draw(positions, schedule)
379 |         
380 | 


--------------------------------------------------------------------------------
/Predator-Prey/envs/gui/guiObjects.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # ----------------------------------------------------------------
 4 | # Purpose of Simple Simulator guiObjects is to provide abstraction
 5 | # for the drawn target and drones on the Simple Simulator Canvas.
 6 | # To use guiObjects, make an instance and blit it on a surface.
 7 | # ----------------------------------------------------------------
 8 | 
 9 | import pygame
10 | from math import cos, sin, pi
11 | WHITE = (255, 255, 255)
12 | BLACK = (0, 0, 0)
13 | ORANGE = (255, 100, 0, 128)
14 | BLUE = (0, 128, 255, 128)
15 | RED = (255, 0, 0, 128)
16 | 
17 | class guiPrey():
18 |     def __init__(self, xi = 1, yi = 1, zi = 30, ai = 10, prey_id = 0):
19 |         # Take initial (x, y, z, radius) as argument
20 |         self.x = xi
21 |         self.y = yi
22 |         self.z = zi
23 |         # self.tr = tr
24 |         self.a = ai
25 |         self.a = None # Not used
26 |         self.prey_id = prey_id
27 |         self.name = "prey"
28 | 
29 |         # Label the prey
30 |         self.color = ORANGE
31 |         self.eye_color = RED
32 |         self.fs = 15 # font size
33 |         self.font = pygame.font.SysFont(pygame.font.get_default_font(), self.fs)
34 |         self.text = ""
35 |         self.label = self.font.render(self.text, True, BLACK)
36 |         
37 |     def setup(self, sx = 25, sy = 25):
38 |         # Set up prey's surface
39 |         self.sx = sx
40 |         self.sy = sy
41 |         self.surface = pygame.Surface((self.sx, self.sy), pygame.SRCALPHA)
42 |         self.surface = self.surface.convert_alpha()
43 | 
44 | class guiPred():
45 |     def __init__(self, xi = 1, yi = 1, zi = 30, ai = 0, pred_id = 0):
46 |         # Take initial (x, y, z, yaw, drone_id) as argument
47 |         self.x = xi
48 |         self.y = yi
49 |         self.z = zi
50 |         self.a = ai
51 |         self.pred_id = pred_id
52 |         self.name = "predator" + str(pred_id)
53 | 
54 |         # Label the drone
55 |         self.body_color = BLUE
56 |         self.eye_color = RED
57 |         self.fs = 32 # font size
58 |         self.font = pygame.font.SysFont(pygame.font.get_default_font(), self.fs)
59 |         self.text = str(pred_id)
60 |         self.label = self.font.render(self.text, True, BLACK)
61 | 
62 |         # Misc.
63 |         self.eye_size = 0.4
64 | 
65 |     def setup(self, sx = 25, sy = 25):
66 |         # Set up predator's surface
67 |         self.sx = sx
68 |         self.sy = sy
69 |         self.surface = pygame.Surface((self.sx, self.sy), pygame.SRCALPHA)
70 |         self.surface = self.surface.convert_alpha()
71 | 


--------------------------------------------------------------------------------
/Predator-Prey/envs/scenario.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | # defines scenario upon which the world is built
 4 | class BaseScenario(object):
 5 |     # create elements of the world
 6 |     def make_world(self):
 7 |         raise NotImplementedError()
 8 |     # create initial conditions of the world
 9 |     def reset_world(self, world):
10 |         raise NotImplementedError()
11 | 


--------------------------------------------------------------------------------
/Predator-Prey/envs/scenarios/__init__.py:
--------------------------------------------------------------------------------
1 | import imp
2 | import os.path as osp
3 | 
4 | 
5 | def load(name):
6 |     pathname = osp.join(osp.dirname(__file__), name)
7 |     return imp.load_source('', pathname)
8 | 


--------------------------------------------------------------------------------
/Predator-Prey/envs/scenarios/endless.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from collections import deque
  3 | from envs.grid_core import World
  4 | from envs.grid_core import CoreAgent as Agent
  5 | from envs.scenario import BaseScenario
  6 | import config
  7 | 
  8 | FLAGS = config.flags.FLAGS
  9 | 
 10 | n_predator = FLAGS.n_predator
 11 | n_prey = FLAGS.n_prey
 12 | map_size = FLAGS.map_size
 13 | 
 14 | class Prey(Agent):
 15 |     def __init__(self):
 16 |         super(Prey, self).__init__("prey", "green")
 17 |         self._movement_mask = np.array(
 18 |             [[0,1,0],
 19 |              [1,0,1],
 20 |              [0,1,0]], dtype=np.int8)
 21 | 
 22 |     def cannot_move(self):
 23 |         minimap = (self._obs[:,:,0] != 0)
 24 |         return np.sum(minimap*self._movement_mask)==4
 25 | 
 26 |     def can_observe_predator(self):
 27 |         shape = np.shape(self._obs[:,:,0])
 28 |         obs_size = shape[0]*shape[1]
 29 |         obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size)
 30 |         ret = np.shape(np.where(obs == 3))[1] > 0
 31 |         return ret
 32 | 
 33 |     def can_observe_two_predator(self):
 34 |         shape = np.shape(self._obs[:,:,0])
 35 |         obs_size = shape[0]*shape[1]
 36 |         obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size)
 37 |         ret = np.shape(np.where(obs == 3))[1] > 1
 38 |         return ret
 39 | 
 40 | class Predator(Agent):
 41 |     def __init__(self):
 42 |         super(Predator, self).__init__("predator", "blue")
 43 |         self._obs = deque(maxlen=FLAGS.history_len)
 44 |         self.obs_range = 1
 45 | 
 46 |     def can_observe_prey(self):
 47 |         shape = np.shape(self._obs)
 48 |         obs_size = shape[1]*shape[2]
 49 |         obs = np.reshape(self._obs, obs_size)
 50 |         ret = np.shape(np.where(obs == 4))[1] > 0
 51 |         return ret
 52 | 
 53 |     def update_obs(self, obs):
 54 |         self._obs.append(obs[:,:,0]) # use only the first channel
 55 | 
 56 |     def fill_obs(self):
 57 |         # fill the whole history with the current observation
 58 |         for i in range(FLAGS.history_len-1):
 59 |             self._obs.append(self._obs[-1])
 60 | 
 61 | class Scenario(BaseScenario):
 62 |     def __init__(self):
 63 |         self.prey_captured = False
 64 | 
 65 |     def make_world(self):
 66 |         world = World(width=map_size, height=map_size)
 67 | 
 68 |         agents = []
 69 |         self.atype_to_idx = {
 70 |             "predator": [],
 71 |             "prey": []
 72 |         }
 73 | 
 74 |         # add predators
 75 |         for i in xrange(n_predator):
 76 |             agents.append(Predator())
 77 |             self.atype_to_idx["predator"].append(i)
 78 | 
 79 |         # add preys
 80 |         for i in xrange(n_prey):
 81 |             agents.append(Prey())
 82 |             self.atype_to_idx["prey"].append(n_predator + i)
 83 | 
 84 |         world.agents = agents
 85 |         for i, agent in enumerate(world.agents):
 86 |             agent.id = i + 1
 87 |             agent.silent = True 
 88 | 
 89 |         # make initial conditions
 90 |         self.reset_world(world)
 91 |         return world
 92 | 
 93 |     def reset_world(self, world):
 94 |         world.empty_grid()
 95 | 
 96 |         # randomly place agent
 97 |         for agent in world.agents:
 98 |             world.placeObj(agent)
 99 | 
100 |         world.set_observations()
101 | 
102 |         # fill the history with current observation
103 |         for i in self.atype_to_idx["predator"]:
104 |             world.agents[i].fill_obs()
105 | 
106 |         self.prey_captured = False
107 | 
108 |     def reward(self, agent, world):
109 |         if agent.itype == "predator":
110 |             # if self.prey_captured:
111 |             #     # return max(10 - world.step_cnt, 0)
112 |             #     return 1
113 |             # else:
114 |             #     reward = -0.01
115 |             #     for i in self.atype_to_idx["prey"]:
116 |             #         prey = world.agents[i]
117 |             #         if prey.cannot_move():
118 |             #             reward = 1
119 |             #             world.resetObj(prey)
120 |             #             return reward
121 |             #     # kdw - Use this for large map size
122 |             #     # if agent.can_observe_prey():
123 |             #     #     reward = 0.0
124 |             #     return reward
125 |             reward = -0.001
126 | 
127 |             for i in self.atype_to_idx["prey"]:
128 |                 prey = world.agents[i]
129 |                 if prey.can_observe_predator():
130 |                     #world.resetObj(prey)
131 |                     reward += 0.1/FLAGS.n_predator
132 |             return reward
133 | 
134 |         else: # if prey
135 |             if agent.cannot_move():
136 |                 return -1
137 | 
138 |         return 0
139 | 
140 |     def observation(self, agent, world):
141 |         # print agent.get_obs.shape
142 |         obs = np.array(agent.get_obs()).flatten()
143 |         return obs
144 | 
145 |     def done(self, agent, world):
146 |         if agent.itype == "prey":
147 |             if agent.can_observe_predator():
148 |                 world.resetObj(agent)
149 |         return False
150 |         #return self.prey_captured


--------------------------------------------------------------------------------
/Predator-Prey/envs/scenarios/endless2.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from collections import deque
  3 | from envs.grid_core import World
  4 | from envs.grid_core import CoreAgent as Agent
  5 | from envs.scenario import BaseScenario
  6 | import config
  7 | 
  8 | FLAGS = config.flags.FLAGS
  9 | 
 10 | n_predator = FLAGS.n_predator
 11 | n_prey = FLAGS.n_prey
 12 | map_size = FLAGS.map_size
 13 | 
 14 | class Prey(Agent):
 15 |     def __init__(self):
 16 |         super(Prey, self).__init__("prey", "green")
 17 |         self._movement_mask = np.array(
 18 |             [[1,1,1],
 19 |              [1,0,1],
 20 |              [1,1,1]], dtype=np.int8)
 21 | 
 22 |     def cannot_move(self):
 23 |         minimap = (self._obs[:,:,0] != 0)
 24 |         return np.sum(minimap*self._movement_mask)==4
 25 | 
 26 |     def can_observe_predator(self):
 27 |         shape = np.shape(self._obs[:,:,0])
 28 |         obs_size = shape[0]*shape[1]
 29 |         obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size)
 30 |         ret = np.shape(np.where(obs == 3))[1] > 0
 31 |         return ret
 32 | 
 33 |     def can_observe_two_predator(self):
 34 |         shape = np.shape(self._obs[:,:,0])
 35 |         obs_size = shape[0]*shape[1]
 36 |         obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size)
 37 |         ret = np.shape(np.where(obs == 3))[1] > 1
 38 |         return ret
 39 | 
 40 | class Predator(Agent):
 41 |     def __init__(self):
 42 |         super(Predator, self).__init__("predator", "blue")
 43 |         self._obs = deque(maxlen=FLAGS.history_len)
 44 |         self.obs_range = 1
 45 | 
 46 |     def can_observe_prey(self):
 47 |         shape = np.shape(self._obs)
 48 |         obs_size = shape[1]*shape[2]
 49 |         obs = np.reshape(self._obs, obs_size)
 50 |         ret = np.shape(np.where(obs == 4))[1] > 0
 51 |         return ret
 52 | 
 53 |     def update_obs(self, obs):
 54 |         self._obs.append(obs[:,:,0]) # use only the first channel
 55 | 
 56 |     def fill_obs(self):
 57 |         # fill the whole history with the current observation
 58 |         for i in range(FLAGS.history_len-1):
 59 |             self._obs.append(self._obs[-1])
 60 | 
 61 | class Scenario(BaseScenario):
 62 |     def __init__(self):
 63 |         self.prey_captured = False
 64 | 
 65 |     def make_world(self):
 66 |         world = World(width=map_size, height=map_size)
 67 | 
 68 |         agents = []
 69 |         self.atype_to_idx = {
 70 |             "predator": [],
 71 |             "prey": []
 72 |         }
 73 | 
 74 |         # add predators
 75 |         for i in xrange(n_predator):
 76 |             agents.append(Predator())
 77 |             self.atype_to_idx["predator"].append(i)
 78 | 
 79 |         # add preys
 80 |         for i in xrange(n_prey):
 81 |             agents.append(Prey())
 82 |             self.atype_to_idx["prey"].append(n_predator + i)
 83 | 
 84 |         world.agents = agents
 85 |         for i, agent in enumerate(world.agents):
 86 |             agent.id = i + 1
 87 |             agent.silent = True 
 88 | 
 89 |         # make initial conditions
 90 |         self.reset_world(world)
 91 |         return world
 92 | 
 93 |     def reset_world(self, world):
 94 |         world.empty_grid()
 95 | 
 96 |         # randomly place agent
 97 |         for agent in world.agents:
 98 |             world.placeObj(agent)
 99 | 
100 |         world.set_observations()
101 | 
102 |         # fill the history with current observation
103 |         for i in self.atype_to_idx["predator"]:
104 |             world.agents[i].fill_obs()
105 | 
106 |         self.prey_captured = False
107 | 
108 |     def reward(self, agent, world):
109 |         if agent.itype == "predator":
110 |             # if self.prey_captured:
111 |             #     # return max(10 - world.step_cnt, 0)
112 |             #     return 1
113 |             # else:
114 |             #     reward = -0.01
115 |             #     for i in self.atype_to_idx["prey"]:
116 |             #         prey = world.agents[i]
117 |             #         if prey.cannot_move():
118 |             #             reward = 1
119 |             #             world.resetObj(prey)
120 |             #             return reward
121 |             #     # kdw - Use this for large map size
122 |             #     # if agent.can_observe_prey():
123 |             #     #     reward = 0.0
124 |             #     return reward
125 |             reward = -0.01/FLAGS.n_predator
126 | 
127 |             for i in self.atype_to_idx["prey"]:
128 |                 prey = world.agents[i]
129 |                 if prey.can_observe_two_predator():
130 |                     #world.resetObj(prey)
131 |                     reward += 1.0/FLAGS.n_predator
132 |             return reward
133 | 
134 |         else: # if prey
135 |             if agent.cannot_move():
136 |                 return -1
137 | 
138 |         return 0
139 | 
140 |     def observation(self, agent, world):
141 |         # print agent.get_obs.shape
142 |         obs = np.array(agent.get_obs()).flatten()
143 |         return obs
144 | 
145 |     def done(self, agent, world):
146 |         if agent.itype == "prey":
147 |             if agent.can_observe_predator():
148 |                 world.resetObj(agent)
149 |         return False
150 |         #return self.prey_captured


--------------------------------------------------------------------------------
/Predator-Prey/envs/scenarios/endless3.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from collections import deque
  3 | from envs.grid_core import World
  4 | from envs.grid_core import CoreAgent as Agent
  5 | from envs.scenario import BaseScenario
  6 | import config
  7 | 
  8 | FLAGS = config.flags.FLAGS
  9 | 
 10 | n_predator = FLAGS.n_predator
 11 | n_prey = FLAGS.n_prey
 12 | n_prey1 = FLAGS.n_prey1
 13 | n_prey2 = FLAGS.n_prey2
 14 | map_size = FLAGS.map_size
 15 | penalty = FLAGS.penalty
 16 | 
 17 | class Prey(Agent):
 18 |     def __init__(self):
 19 |         super(Prey, self).__init__("prey", "green")
 20 |         self._movement_mask = np.array(
 21 |             [[0,1,0],
 22 |              [1,0,1],
 23 |              [0,1,0]], dtype=np.int8)
 24 | 
 25 |     def cannot_move(self):
 26 |         minimap = (self._obs[:,:,0] != 0)
 27 |         return np.sum(minimap*self._movement_mask)==4
 28 | 
 29 |     def can_observe_predator(self):
 30 |         shape = np.shape(self._obs[:,:,0])
 31 |         obs_size = shape[0]*shape[1]
 32 |         obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size)
 33 |         ret = np.shape(np.where(obs == 3))[1] > 0
 34 |         return ret
 35 | 
 36 |     def can_observe_two_predator(self):
 37 |         shape = np.shape(self._obs[:,:,0])
 38 |         obs_size = shape[0]*shape[1]
 39 |         obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size)
 40 |         ret = np.shape(np.where(obs == 3))[1] > 1
 41 |         return ret
 42 | 
 43 |     def can_observe_three_predator(self):
 44 |         shape = np.shape(self._obs[:,:,0])
 45 |         obs_size = shape[0]*shape[1]
 46 |         obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size)
 47 |         ret = np.shape(np.where(obs == 3))[1] > 2
 48 |         return ret
 49 | 
 50 | class Prey2(Agent):
 51 |     def __init__(self):
 52 |         super(Prey2, self).__init__("prey2", "red")
 53 |         self._movement_mask = np.array(
 54 |             [[0,1,0],
 55 |              [1,0,1],
 56 |              [0,1,0]], dtype=np.int8)
 57 | 
 58 |     def cannot_move(self):
 59 |         minimap = (self._obs[:,:,0] != 0)
 60 |         return np.sum(minimap*self._movement_mask)==4
 61 | 
 62 |     def can_observe_predator(self):
 63 |         shape = np.shape(self._obs[:,:,0])
 64 |         obs_size = shape[0]*shape[1]
 65 |         obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size)
 66 |         ret = np.shape(np.where(obs == 3))[1] > 0
 67 |         return ret
 68 | 
 69 |     def can_observe_two_predator(self):
 70 |         shape = np.shape(self._obs[:,:,0])
 71 |         obs_size = shape[0]*shape[1]
 72 |         obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size)
 73 |         ret = np.shape(np.where(obs == 3))[1] > 1
 74 |         return ret
 75 | 
 76 |     def can_observe_three_predator(self):
 77 |         shape = np.shape(self._obs[:,:,0])
 78 |         obs_size = shape[0]*shape[1]
 79 |         obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size)
 80 |         ret = np.shape(np.where(obs == 3))[1] > 2
 81 |         return ret
 82 | 
 83 | class Predator(Agent):
 84 |     def __init__(self):
 85 |         super(Predator, self).__init__("predator", "blue")
 86 |         self._obs = deque(maxlen=FLAGS.history_len)
 87 |         self.obs_range = 1
 88 | 
 89 |     def can_observe_prey(self):
 90 |         shape = np.shape(self._obs)
 91 |         obs_size = shape[1]*shape[2]
 92 |         obs = np.reshape(self._obs, obs_size)
 93 |         ret = np.shape(np.where(obs > 3))[1] > 0
 94 |         return ret
 95 | 
 96 |     def update_obs(self, obs):
 97 |         self._obs.append(obs[:,:,0]) # use only the first channel
 98 | 
 99 |     def fill_obs(self):
100 |         # fill the whole history with the current observation
101 |         for i in range(FLAGS.history_len-1):
102 |             self._obs.append(self._obs[-1])
103 | 
104 | class Scenario(BaseScenario):
105 |     def __init__(self):
106 |         self.prey_captured = False
107 | 
108 |     def make_world(self):
109 |         world = World(width=map_size, height=map_size)
110 | 
111 |         agents = []
112 |         self.atype_to_idx = {
113 |             "predator": [],
114 |             "prey": [],
115 |             "prey2": []
116 |         }
117 | 
118 |         # add predators
119 |         for i in xrange(n_predator):
120 |             agents.append(Predator())
121 |             self.atype_to_idx["predator"].append(i)
122 | 
123 |         # add preys
124 |         for i in xrange(n_prey1):
125 |             agents.append(Prey())
126 |             self.atype_to_idx["prey"].append(n_predator + i)
127 | 
128 |         for i in xrange(n_prey2):
129 |             agents.append(Prey2())
130 |             self.atype_to_idx["prey2"].append(n_predator + n_prey1 + i)
131 | 
132 |         world.agents = agents
133 |         for i, agent in enumerate(world.agents):
134 |             agent.id = i + 1
135 |             agent.silent = True 
136 | 
137 |         # make initial conditions
138 |         self.reset_world(world)
139 |         return world
140 | 
141 |     def reset_world(self, world):
142 |         world.empty_grid()
143 | 
144 |         # randomly place agent
145 |         for agent in world.agents:
146 |             world.placeObj(agent)
147 | 
148 |         world.set_observations()
149 | 
150 |         # fill the history with current observation
151 |         for i in self.atype_to_idx["predator"]:
152 |             world.agents[i].fill_obs()
153 | 
154 |         self.prey_captured = False
155 | 
156 |     def reward(self, agent, world):
157 |         if agent.itype == "predator":
158 |             reward = 0.
159 |             count = 0
160 |             for i in self.atype_to_idx["prey"]:
161 |                 # reward += -0.01
162 |                 prey = world.agents[i]
163 |                 # if prey.can_observe_three_predator():
164 |                 #     reward += 10.0
165 |                 if prey.can_observe_predator():
166 |                     reward += +1.0
167 |                     # print "WIN"
168 |                     # print "CATCH"
169 |                 # elif prey.can_observe_predator():
170 |                 #     # print "LOSE"
171 |                 #     # reward += 0.
172 |                 #     reward += +penalty/10.
173 |                     # if penalty > 10:
174 |                     #     reward += (penalty-10)/10.
175 |                     # # else:
176 |                     # reward += +1.
177 |                 # if prey.can_observe_predator():
178 |                 #     count += 1
179 |             for i in self.atype_to_idx["prey2"]:
180 |                 # reward += -0.01
181 |                 prey = world.agents[i]
182 |                 # if prey.can_observe_three_predator():
183 |                 #     reward += 10.0
184 |                 if prey.can_observe_two_predator():
185 |                     reward += 1.0
186 |                     # print "WIN"
187 |                     # print "CATCH"
188 |                 elif prey.can_observe_predator():
189 |                     # print "LOSE"
190 |                     # reward += 0.
191 |                     reward += -penalty/10.
192 |             # if reward > 1:
193 |             #     print "CATCH"
194 |             # if count > 1:
195 |             #     reward += 1.0
196 |             # elif count == 1:
197 |             #     reward += -penalty/10.
198 |             # else:
199 |             #     reward += 0.
200 | 
201 | 
202 |             return reward/(n_predator)
203 | 
204 |         else: # if prey
205 |             if agent.cannot_move():
206 |                 return 0
207 | 
208 |         return 0
209 | 
210 |     def observation(self, agent, world):
211 |         # print agent.get_obs.shape
212 |         obs = np.array(agent.get_obs()).flatten()
213 |         return obs
214 | 
215 |     def done(self, agent, world):
216 |         if agent.itype == "prey":
217 |             if agent.can_observe_predator():
218 |                 # world.resetObj(agent)
219 |                 return True
220 |         if agent.itype == "prey2":
221 |             if agent.can_observe_two_predator():
222 |                 # world.resetObj(agent)
223 |                 return True
224 |         # if agent.itype == "predator":
225 |         #     if agent.can_observe_prey():
226 |         #         # world.resetObj(agent)
227 |         #         return True
228 |         return False
229 |         #return self.prey_captured
230 | 


--------------------------------------------------------------------------------
/Predator-Prey/envs/scenarios/pursuit.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from collections import deque
  3 | from envs.grid_core import World
  4 | from envs.grid_core import CoreAgent as Agent
  5 | from envs.scenario import BaseScenario
  6 | import config
  7 | 
  8 | FLAGS = config.flags.FLAGS
  9 | 
 10 | n_predator = FLAGS.n_predator
 11 | n_prey = FLAGS.n_prey
 12 | map_size = FLAGS.map_size
 13 | 
 14 | class Prey(Agent):
 15 |     def __init__(self):
 16 |         super(Prey, self).__init__("prey", "green")
 17 |         self._movement_mask = np.array(
 18 |             [[0,1,0],
 19 |              [1,0,1],
 20 |              [0,1,0]], dtype=np.int8)
 21 | 
 22 |     def cannot_move(self):
 23 |         minimap = (self._obs[:,:,0] != 0)
 24 |         return np.sum(minimap*self._movement_mask)==4
 25 | 
 26 | class Predator(Agent):
 27 |     def __init__(self):
 28 |         super(Predator, self).__init__("predator", "blue")
 29 |         self._obs = deque(maxlen=FLAGS.history_len)
 30 |         self.obs_range = 1
 31 | 
 32 |     def can_observe_prey(self):
 33 |         shape = np.shape(self._obs)
 34 |         obs_size = shape[1]*shape[2]
 35 |         obs = np.reshape(self._obs, obs_size)
 36 |         ret = np.shape(np.where(obs == 4))[1] > 0
 37 |         return ret
 38 | 
 39 |     def update_obs(self, obs):
 40 |         self._obs.append(obs[:,:,0]) # use only the first channel
 41 | 
 42 |     def fill_obs(self):
 43 |         # fill the whole history with the current observation
 44 |         for i in range(FLAGS.history_len-1):
 45 |             self._obs.append(self._obs[-1])
 46 | 
 47 | class Scenario(BaseScenario):
 48 |     def __init__(self):
 49 |         self.prey_captured = False
 50 | 
 51 |     def make_world(self):
 52 |         world = World(width=map_size, height=map_size)
 53 | 
 54 |         agents = []
 55 |         self.atype_to_idx = {
 56 |             "predator": [],
 57 |             "prey": []
 58 |         }
 59 | 
 60 |         # add predators
 61 |         for i in xrange(n_predator):
 62 |             agents.append(Predator())
 63 |             self.atype_to_idx["predator"].append(i)
 64 | 
 65 |         # add preys
 66 |         for i in xrange(n_prey):
 67 |             agents.append(Prey())
 68 |             self.atype_to_idx["prey"].append(n_predator + i)
 69 | 
 70 |         world.agents = agents
 71 |         for i, agent in enumerate(world.agents):
 72 |             agent.id = i + 1
 73 |             agent.silent = True 
 74 | 
 75 |         # make initial conditions
 76 |         self.reset_world(world)
 77 |         return world
 78 | 
 79 |     def reset_world(self, world):
 80 |         world.empty_grid()
 81 | 
 82 |         # randomly place agent
 83 |         for agent in world.agents:
 84 |             world.placeObj(agent)
 85 | 
 86 |         world.set_observations()
 87 | 
 88 |         # fill the history with current observation
 89 |         for i in self.atype_to_idx["predator"]:
 90 |             world.agents[i].fill_obs()
 91 | 
 92 |         self.prey_captured = False
 93 | 
 94 |     def reward(self, agent, world):
 95 |         if agent.itype == "predator":
 96 |             if self.prey_captured:
 97 |                 # return max(10 - world.step_cnt, 0)
 98 |                 return 1
 99 |             else:
100 |                 reward = -0.01
101 |                 for i in self.atype_to_idx["prey"]:
102 |                     prey = world.agents[i]
103 |                     if prey.cannot_move():
104 |                         # print "captured"
105 |                         self.prey_captured = True
106 |                         reward = 1
107 |                         return reward
108 |                 # kdw - Use this for large map size
109 |                 # if agent.can_observe_prey():
110 |                 #     reward = 0.0
111 |                 return reward
112 |         else: # if prey
113 |             if agent.cannot_move():
114 |                 return -1
115 | 
116 |         return 0
117 | 
118 |     def observation(self, agent, world):
119 |         # print agent.get_obs.shape
120 |         obs = np.array(agent.get_obs()).flatten()
121 |         return obs
122 | 
123 |     def done(self, agent, world):
124 |         return self.prey_captured


--------------------------------------------------------------------------------
/Predator-Prey/envs/scenarios/single_agent.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from envs.scenarios.pursuit import Scenario as BaseScenario
 3 | import config
 4 | 
 5 | FLAGS = config.flags.FLAGS
 6 | map_size = FLAGS.map_size
 7 | 
 8 | class Scenario(BaseScenario):
 9 |     def __init__(self):
10 |         super(Scenario, self).__init__()
11 |         print "Single agent scenario"
12 | 
13 |     def reset_world(self, world):
14 |         world.empty_grid()
15 | 
16 |         prey_pos = [0, 0]
17 | 
18 |         prey_idx = self.atype_to_idx["prey"][0]
19 |         world.placeObj(world.agents[prey_idx], top=prey_pos, size=(1,1))
20 | 
21 |         top = ((prey_pos[0]+1)%map_size, (prey_pos[1]+1)%map_size)
22 | 
23 |         world.placeObj(world.agents[0], top=top, size=(2, 2))
24 |         world.placeObj(world.agents[1], top=[0, 1], size=(1, 1))
25 | 
26 |         world.set_observations()
27 | 
28 |         # fill the history with current observation
29 |         for i in self.atype_to_idx["predator"]:
30 |             world.agents[i].fill_obs()
31 | 
32 |         self.prey_captured = False
33 | 


--------------------------------------------------------------------------------
/Predator-Prey/envs/scenarios/static_prey.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from envs.scenarios.pursuit import Scenario as BaseScenario
 3 | import config
 4 | 
 5 | FLAGS = config.flags.FLAGS
 6 | map_size = FLAGS.map_size
 7 | 
 8 | class Scenario(BaseScenario):
 9 |     def __init__(self):
10 |         super(Scenario, self).__init__()
11 | 
12 |     def reset_world(self, world):
13 |         world.empty_grid()
14 | 
15 |         # prey_pos = np.random.choice([map_size - 1, 0], 2)
16 |         prey_pos = [0, 0]
17 |         prey_idx = self.atype_to_idx["prey"][0]
18 |         world.placeObj(world.agents[prey_idx], top=prey_pos, size=(1,1))
19 | 
20 |         top = ((prey_pos[0]+1)%map_size, (prey_pos[1]+1)%map_size)
21 |         for idx in self.atype_to_idx["predator"]:
22 |             world.placeObj(world.agents[idx], top=top, size=(2,2))
23 | 
24 |         world.set_observations()
25 | 
26 |         # fill the history with current observation
27 |         for i in self.atype_to_idx["predator"]:
28 |             world.agents[i].fill_obs()
29 | 
30 |         self.prey_captured = False
31 | 


--------------------------------------------------------------------------------
/Predator-Prey/main.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf8
 3 | import logging
 4 | import make_env
 5 | import agents
 6 | import config
 7 | import time
 8 | import random
 9 | import tensorflow as tf
10 | import numpy as np
11 | 
12 | FLAGS = config.flags.FLAGS
13 | 
14 | def set_seed(seed):
15 |     """Initialized the random seeds
16 |     """
17 |     random.seed(seed)
18 |     np.random.seed(seed)
19 |     tf.set_random_seed(seed)
20 |     return None
21 | 
22 | 
23 | if __name__ == '__main__':
24 | 
25 |     seed = FLAGS.seed
26 |     set_seed(seed)
27 |     print 'SEED', seed
28 | 
29 |     # === Logging setup === #
30 |     logger_env = logging.getLogger('GridMARL')
31 |     logger_agent = logging.getLogger('Agent')
32 | 
33 |     # === Program start === #
34 |     # Load environment
35 |     env = make_env.make_env(FLAGS.scenario)
36 |     logger_env.info('GridMARL Start with %d predator(s) and %d prey(s)', FLAGS.n_predator, FLAGS.n_prey)
37 | 
38 |     # Load trainer
39 |     logger_agent.info('Agent: {}'.format(FLAGS.agent))
40 |     trainer = agents.load(FLAGS.agent+"/trainer.py").Trainer(env)
41 | 
42 |     print FLAGS.agent, config.file_name
43 | 
44 |     # start learning
45 |     if FLAGS.train:
46 |         start_time = time.time()
47 |         trainer.learn()
48 |         finish_time = time.time()
49 |         # trainer.test()
50 |         print "TRAINING TIME (sec)", finish_time - start_time
51 |     else:
52 |         trainer.test()
53 | 


--------------------------------------------------------------------------------
/Predator-Prey/make_env.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Code for creating a multiagent environment with one of the scenarios listed
 3 | in ./scenarios/.
 4 | Can be called by using, for example:
 5 |     env = make_env('simple_speaker_listener')
 6 | After producing the env object, can be used similarly to an OpenAI gym
 7 | environment.
 8 | 
 9 | A policy using this environment must output actions in the form of a list
10 | for all agents. Each element of the list should be a numpy array,
11 | of size (env.world.dim_p + env.world.dim_c, 1). Physical actions precede
12 | communication actions in this array. See environment.py for more details.
13 | """
14 | 
15 | def make_env(scenario_name, benchmark=False):
16 |     '''
17 |     Creates a MultiAgentEnv object as env. This can be used similar to a gym
18 |     environment by calling env.reset() and env.step().
19 |     Use env.render() to view the environment on the screen.
20 | 
21 |     Input:
22 |         scenario_name   :   name of the scenario from ./scenarios/ to be Returns
23 |                             (without the .py extension)
24 |         benchmark       :   whether you want to produce benchmarking data
25 |                             (usually only done during evaluation)
26 | 
27 |     Some useful env properties (see environment.py):
28 |         .observation_space  :   Returns the observation space for each agent
29 |         .action_space       :   Returns the action space for each agent
30 |         .n                  :   Returns the number of Agents
31 |     '''
32 |     from envs.environment import MultiAgentEnv
33 |     import envs.scenarios as scenarios
34 | 
35 |     # load scenario from script
36 |     scenario = scenarios.load(scenario_name + ".py").Scenario()
37 |     # create world
38 |     world = scenario.make_world()
39 |     # create multiagent environment
40 |     if benchmark:        
41 |         env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data)
42 |     else:
43 |         env = MultiAgentEnv(world, reset_callback=scenario.reset_world, 
44 |                                    reward_callback=scenario.reward, 
45 |                                    observation_callback=scenario.observation,
46 |                                    done_callback=scenario.done)
47 |     return env
48 | 


--------------------------------------------------------------------------------
/Predator-Prey/readme:
--------------------------------------------------------------------------------
 1 | Training
 2 | 
 3 | $algorithm = vdn, qmix, pqmix5(=QTRAN-alt in the paper), pqmix7(=QTRAN in the paper)
 4 | 
 5 | (i) 2 Predator & 1 Prey (5X5 Map) with P=0.5
 6 | 
 7 | python main.py --scenario endless3 --n_predator 2 --n_prey1 0 --n_prey2 1 --n_prey 1 --map_size 5 --agent pos_cac_fo --training_step 3000000 --testing_step 10000 --max_step 100 --b_size 600000 --df 0.99 --eval_step 100 --algorithm $algorithm --lr 0.0005 --seed 0 --penalty 5 --comment 215
 8 | 
 9 | (ii) 4 Predator & 2 Prey (7X7 Map) with P=0.5
10 | 
11 | python main.py --scenario endless3 --n_predator 4 --n_prey1 0 --n_prey2 2 --n_prey 2 --map_size 7 --agent pos_cac_fo --training_step 6000000 --testing_step 10000 --max_step 100 --b_size 1000000 --df 0.99 --eval_step 100 --algorithm $algorithm --lr 0.0005 --seed 0 --penalty 5 --comment 427 &
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/Predator-Prey/run_DQN9.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | GPU=$1
 4 | #for penalty in 0 2 4 6 8 10 12 14
 5 | for seed in 28 29 30 31 32 #12 13
 6 | #for penalty in 5 10 15
 7 | do
 8 | for penalty in 5
 9 | do
10 | 
11 | #CUDA_VISIBLE_DEVICES=$1 python main.py --scenario endless3 --n_predator 2 --n_prey1 0 --n_prey2 1 --n_prey 1 --map_size 5 --agent $2 --training_step 3000000 --testing_step 10000 --max_step 100 --b_size 600000 --df 0.99 --eval_step 100 --algorithm $3 --lr 0.0005 --seed $seed --penalty $penalty --comment "$4"215 &
12 | 
13 | 
14 | 
15 | 
16 | 
17 | 
18 | #for seed in 401 402 403 404 405
19 | #do
20 |     
21 | #CUDA_VISIBLE_DEVICES=$1 python main.py --scenario endless3 --n_predator 3 --n_prey1 0 --n_prey2 2 --n_prey 2 --map_size 6 --agent $2 --training_step 3000000 --testing_step 10000 --max_step 100 --b_size 600000 --df 0.99 --eval_step 100 --algorithm $3 --lr 0.0005 --seed $seed --penalty $penalty --comment "$4"326 &
22 | 
23 | CUDA_VISIBLE_DEVICES=$1 python main.py --scenario endless3 --n_predator 4 --n_prey1 0 --n_prey2 2 --n_prey 2 --map_size 7 --agent $2 --training_step 6000000 --testing_step 10000 --max_step 100 --b_size 1000000 --df 0.99 --eval_step 100 --algorithm $3 --lr 0.0005 --seed $seed --penalty $penalty --comment "$4"427 &
24 | 
25 | 
26 | #    CUDA_VISIBLE_DEVICES=$1 python main.py --scenario endless3 --n_predator 3 --n_prey 2 --map_size 7 --agent $2 --training_step 3000000 --testing_step 10000 --max_step 100 --b_size 500000 --df 0.99 --eval_step 100 --algorithm $3 --lr 0.0001 --seed $seed --penalty $penalty --beta $4 --comment "$4"-326 &
27 | 
28 | #CUDA_VISIBLE_DEVICES=$1 python main.py --scenario endless3 --n_predator 4 --n_prey 2 --map_size 8 --agent $2 --training_step 3000000 --testing_step 10000 --max_step 100 --b_size 500000 --df 0.99 --eval_step 100 --algorithm $3 --lr 0.0001 --seed $seed --penalty $penalty --comment "$4"428 &
29 | 
30 | #done
31 | 
32 | done
33 | done
34 | 


--------------------------------------------------------------------------------
/Predator-Prey/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os,sys
 3 | sys.path.insert(1, os.path.join(sys.path[0], '..'))
 4 | import argparse
 5 | 
 6 | from envs.environment import MultiAgentEnv
 7 | import envs.scenarios as scenarios
 8 | import numpy as np
 9 | import config
10 | 
11 | FLAGS = config.flags.FLAGS
12 | 
13 | 
14 | if __name__ == '__main__':
15 |     # parse arguments
16 |     parser = argparse.ArgumentParser(description=None)
17 |     parser.add_argument('-s', '--scenario', default='pursuit.py', help='Path of the scenario Python script.')
18 |     args = parser.parse_args()
19 | 
20 |     # load scenario from script
21 |     scenario = scenarios.load(args.scenario).Scenario()
22 |     # create world
23 |     world = scenario.make_world()
24 |     # create multiagent environment
25 |     env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, done_callback=scenario.done)
26 |     act_n = [2, 2]
27 |     print "action space:", env.action_space[0].n
28 |     print "observation space:", env.observation_space
29 | 
30 |     obs_n = env.reset()[:2]
31 |     print env.get_agent_profile()
32 |     print env.get_full_encoding()[:, :, 2]
33 |     imap = np.array(obs_n).reshape((2, FLAGS.history_len,3,3,1))
34 | 
35 |     minimap = imap[:,:,:,:,0]
36 |     print minimap[0, -1]
37 |     print minimap[1, -1]
38 | 
39 |     while True:
40 |         a0 = input("action of agent 0:")
41 |         a1 = input("action of agent 1:")
42 |         act_n = [a0, a1, 2]
43 |         obs_n, reward_n, done_n, info_n = env.step(act_n)
44 |         obs_n = obs_n[:2]
45 |         
46 | 
47 |         print env.get_full_encoding()[:,:,2]
48 |         imap = np.array(obs_n).reshape((2, FLAGS.history_len,3,3,1))
49 | 
50 |         minimap = imap[:,:,:,:,0]
51 |         print minimap[0, -1]
52 |         print minimap[1, -1]
53 | 
54 | 
55 |         print reward_n, done_n
56 | 
57 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # QTRAN: Learning to Factorize with Transformation for Cooperative Multi-Agent Reinforcement Learning
 2 | 
 3 | There will be additional updates later
 4 | 
 5 | ## Predator-prey
 6 | 
 7 | Training
 8 | 
 9 | $algorithm = vdn, qmix, pqmix5(=QTRAN-alt in the paper), pqmix7(=QTRAN in the paper)
10 | 
11 | (i) 2 Predator & 1 Prey (5X5 Map) with P=0.5
12 | 
13 | python main.py --scenario endless3 --n_predator 2 --n_prey1 0 --n_prey2 1 --n_prey 1 --map_size 5 --agent pos_cac_fo --training_step 3000000 --testing_step 10000 --max_step 100 --b_size 600000 --df 0.99 --eval_step 100 --algorithm $algorithm --lr 0.0005 --seed 0 --penalty 5 --comment 215
14 | 
15 | (ii) 4 Predator & 2 Prey (7X7 Map) with P=0.5
16 | 
17 | python main.py --scenario endless3 --n_predator 4 --n_prey1 0 --n_prey2 2 --n_prey 2 --map_size 7 --agent pos_cac_fo --training_step 6000000 --testing_step 10000 --max_step 100 --b_size 1000000 --df 0.99 --eval_step 100 --algorithm $algorithm --lr 0.0005 --seed 0 --penalty 5 --comment 427 &
18 | 
19 | 
20 | ## Others
21 | 
22 | Training
23 | 
24 | $algorithm = vdn, qmix, pqmix5(=QTRAN-alt in the paper), pqmix7(=QTRAN in the paper)
25 | 
26 | python main.py --agent pos_cac_fo --training_step 10000 --b_size 10000 --m_size 32 --seed 0 --algorithm $algorithm --penalty 0
27 | 
28 | 
29 | In make_env.py
30 | 
31 | (i) Matrix game
32 | 
33 | from envs.environment import MultiAgentSimpleEnv2 as MAS
34 | 
35 | (i) Gaussian Squeeze
36 | 
37 | from envs.environment import MultiAgentSimpleEnv4 as MAS
38 | 


--------------------------------------------------------------------------------