├── .gitignore ├── Others ├── agents │ ├── __init__.py │ ├── config_agents.py │ ├── evaluation.py │ ├── pos_cac_fo │ │ ├── __init__.py │ │ ├── agent.py │ │ ├── dq_network.py │ │ ├── replay_buffer.py │ │ └── trainer.py │ ├── replay_buffer.py │ └── simple_agent.py ├── config.py ├── envs │ ├── #environment.py# │ ├── __init__.py │ ├── config_env.py │ ├── environment.py │ ├── grid_core.py │ ├── scenario.py │ └── scenarios │ │ ├── __init__.py │ │ ├── endless.py │ │ ├── pursuit.py │ │ ├── single_agent.py │ │ └── static_prey.py ├── main.py ├── make_env.py ├── readme ├── run_DQN2.sh └── test.py ├── Predator-Prey ├── agents │ ├── __init__.py │ ├── config_agents.py │ ├── evaluation.py │ ├── non_simple_agent.py │ ├── pos_cac_fo │ │ ├── __init__.py │ │ ├── agent.py │ │ ├── dq_network.py │ │ ├── replay_buffer.py │ │ └── trainer.py │ ├── replay_buffer.py │ └── simple_agent.py ├── config.py ├── envs │ ├── __init__.py │ ├── config_env.py │ ├── environment.py │ ├── grid_core.py │ ├── gui │ │ ├── __init__.py │ │ ├── canvas.py │ │ └── guiObjects.py │ ├── scenario.py │ └── scenarios │ │ ├── __init__.py │ │ ├── endless.py │ │ ├── endless2.py │ │ ├── endless3.py │ │ ├── pursuit.py │ │ ├── single_agent.py │ │ └── static_prey.py ├── main.py ├── make_env.py ├── readme ├── run_DQN9.sh └── test.py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /Others/agents/__init__.py: -------------------------------------------------------------------------------- 1 | import imp 2 | import os.path as osp 3 | import logging 4 | 5 | 6 | def load(name): 7 | pathname = osp.join(osp.dirname(__file__), name) 8 | return imp.load_source('', pathname) 9 | 10 | 11 | logger_agent = logging.getLogger('Agent') 12 | logger_agent.setLevel(logging.INFO) 13 | fh_agent = logging.FileHandler('./agent.log') 14 | sh = logging.StreamHandler() 15 | fm = logging.Formatter('[%(levelname)s|%(filename)s:%(lineno)s] %(asctime)s > [%(name)s] %(message)s') 16 | fh_agent.setFormatter(fm) 17 | sh.setFormatter(fm) 18 | logger_agent.addHandler(fh_agent) 19 | logger_agent.addHandler(sh) 20 | -------------------------------------------------------------------------------- /Others/agents/config_agents.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf8 3 | # import agents 4 | 5 | 6 | def config_agent(_flags): 7 | flags = _flags 8 | 9 | flags.DEFINE_string("agent", "cac_fo", "Agent") 10 | 11 | flags.DEFINE_integer("training_step", 10000, "Training time step") 12 | flags.DEFINE_integer("testing_step", 1, "Testing time step") 13 | flags.DEFINE_integer("max_step", 200, "Maximum time step per episode") 14 | flags.DEFINE_integer("eval_step", 100, "Number of steps before training") 15 | # flags.DEFINE_integer("training_step", 5000, "Training time step") 16 | # flags.DEFINE_integer("testing_step", 1000, "Testing time step") 17 | # flags.DEFINE_integer("max_step", 200, "Maximum time step per episode") 18 | # flags.DEFINE_integer("eval_step", 1000, "Number of steps before training") 19 | 20 | flags.DEFINE_integer("b_size", 50000, "Size of the replay memory") 21 | flags.DEFINE_integer("m_size", 64, "Minibatch size") 22 | flags.DEFINE_integer("pre_train_step", 10, "during [m_size * pre_step] take random action") 23 | flags.DEFINE_float("lr", 0.0005, "Learning rate") 24 | # flags.DEFINE_float("lr", 0.01, "Learning rate") # it is for single 25 | flags.DEFINE_float("df", 0.99, "Discount factor") 26 | 27 | flags.DEFINE_boolean("load_nn", False, "Load nn from file or not") 28 | flags.DEFINE_string("nn_file", "results/nn/s", "The name of file for loading") 29 | 30 | flags.DEFINE_boolean("train", True, "Training or testing") 31 | flags.DEFINE_boolean("qtrace", False, "Use q trace") 32 | flags.DEFINE_boolean("kt", False, "Keyboard input test") 33 | flags.DEFINE_boolean("use_action_in_critic", False, "Use guided samples") 34 | flags.DEFINE_string("algorithm", "ddd", "algorithm") 35 | flags.DEFINE_string("epsilon", "Yes", "Use eps-greedy decreasing method (or other options can be added") 36 | 37 | 38 | 39 | 40 | def get_filename(): 41 | import config 42 | FLAGS = config.flags.FLAGS 43 | 44 | return "a-"+FLAGS.agent+"-lr-"+str(FLAGS.lr)+"-ms-"+str(FLAGS.m_size)+"-algorithm-"+str(FLAGS.algorithm) -------------------------------------------------------------------------------- /Others/agents/evaluation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf8 3 | 4 | import numpy as np 5 | import logging 6 | import config 7 | 8 | FLAGS = config.flags.FLAGS 9 | result = logging.getLogger('Result') 10 | 11 | class Evaluation(object): 12 | 13 | def __init__(self): 14 | self.episode_cnt = 0 15 | self.m = dict() 16 | 17 | def update_value(self, m_key, m_value, m_append=None): 18 | if m_key in self.m: 19 | self.m[m_key]['value'] += m_value 20 | self.m[m_key]['cnt'] += 1 21 | else: 22 | self.m[m_key] = dict() 23 | self.m[m_key]['value'] = m_value 24 | self.m[m_key]['cnt'] = 1 25 | if m_append is None: 26 | result.info(m_key + "\t" + str(m_value)) 27 | else: 28 | result.info(m_key + "\t" + str(m_value) + "\t" + str(m_append)) 29 | 30 | def summarize(self, key=None): 31 | if key is None: 32 | for k in self.m: 33 | print "Average", k, float(self.m[k]['value'])/self.m[k]['cnt'] 34 | result.info("summary\t" + k + "\t" + str(float(self.m[k]['value']) / self.m[k]['cnt'])) 35 | 36 | elif key not in self.m: 37 | print "Wrong key" 38 | 39 | else: 40 | print "Average", key, float(self.m[key]['value']) / self.m[key]['cnt'] 41 | result.info("summary\t" + key + "\t" + str(float(self.m[key]['value'])/self.m[key]['cnt'])) 42 | -------------------------------------------------------------------------------- /Others/agents/pos_cac_fo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sonkyunghwan/QTRAN/785c44ebc8379896dc9f513af2ac767d61013914/Others/agents/pos_cac_fo/__init__.py -------------------------------------------------------------------------------- /Others/agents/pos_cac_fo/agent.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf8 3 | 4 | """ 5 | =========================================== 6 | :mod:`qlearn` Q-Learning 7 | =========================================== 8 | 9 | 설명 10 | ===== 11 | 12 | Choose action based on q-learning algorithm 13 | """ 14 | 15 | import numpy as np 16 | import tensorflow as tf 17 | import math 18 | from agents.pos_cac_fo.dq_network import * 19 | from agents.pos_cac_fo.replay_buffer import * 20 | from agents.evaluation import Evaluation 21 | import matplotlib 22 | matplotlib.use('Agg') 23 | import matplotlib.pyplot as plt 24 | import matplotlib.animation as animation 25 | 26 | import logging 27 | import config 28 | 29 | FLAGS = config.flags.FLAGS 30 | logger = logging.getLogger("Agent") 31 | result = logging.getLogger('Result') 32 | 33 | 34 | class Agent(object): 35 | 36 | def __init__(self, action_dim, obs_dim, name=""): 37 | logger.info("Centralized DQN Agent") 38 | 39 | 40 | self._obs_dim = obs_dim 41 | self._n_player = FLAGS.n_predator 42 | self._action_dim = action_dim * self._n_player 43 | self._action_dim_single = action_dim 44 | self._state_dim = obs_dim 45 | 46 | self._name = name 47 | self.update_cnt = 0 48 | self.target_update_period = 3000 49 | 50 | self.df = FLAGS.df 51 | self.lr = FLAGS.lr 52 | 53 | # Make Q-network 54 | tf.reset_default_graph() 55 | my_graph = tf.Graph() 56 | 57 | with my_graph.as_default(): 58 | self.sess = tf.Session(graph=my_graph, config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) 59 | self.q_network = DQNetwork(self.sess, self._state_dim, self._action_dim_single, self._n_player) 60 | self.sess.run(tf.global_variables_initializer()) 61 | self.saver = tf.train.Saver() 62 | if FLAGS.load_nn: 63 | print "LOAD!" 64 | self.saver.restore(self.sess, FLAGS.nn_file) 65 | 66 | self.replay_buffer = ReplayBuffer() 67 | 68 | self._eval = Evaluation() 69 | self.q_prev = None 70 | 71 | self.ims = [] 72 | plt.clf() 73 | self.fig = plt.figure() 74 | self.axes = plt.gca() 75 | plt.xticks(list(range(0,25,5))) 76 | plt.yticks(list(range(0,25,5))) 77 | self.axes.tick_params(axis='both',labelsize = 15) 78 | 79 | 80 | 81 | def act(self, state): 82 | state_i = state 83 | s = np.reshape(state_i, self._state_dim) 84 | 85 | action = self.q_network.get_action(s[None])[0] 86 | 87 | return action 88 | 89 | def train(self, state, action, reward, state_n, done): 90 | 91 | a = self.action_to_onehot(action) 92 | s = state 93 | s_n = state_n 94 | r = np.sum(reward) 95 | 96 | self.store_sample(s, a, r, s_n, done) 97 | self.update_network() 98 | return 0 99 | 100 | def store_sample(self, s, a, r, s_n, done): 101 | self.replay_buffer.add_to_memory((s, a, r, s_n, done)) 102 | return 0 103 | 104 | def update_network(self): 105 | self.update_cnt += 1 106 | if len(self.replay_buffer.replay_memory) < FLAGS.m_size * FLAGS.pre_train_step: 107 | return 0 108 | 109 | minibatch = self.replay_buffer.sample_from_memory() 110 | self.q_network.training_qnet(minibatch) 111 | 112 | if self.update_cnt % self.target_update_period == 0: 113 | self.q_network.training_target_qnet() 114 | if FLAGS.qtrace: 115 | if self.update_cnt % 10000 == 0: 116 | self.q_diff() 117 | 118 | if self.update_cnt % 10000 == 0: 119 | self.saver.save(self.sess, config.nn_filename, self.update_cnt) 120 | 121 | return 0 122 | 123 | def state_to_index(self, state): 124 | """ 125 | For the single agent case, the state is only related to the position of agent 1 126 | :param state: 127 | :return: 128 | """ 129 | ret = np.zeros(self._state_dim) 130 | for i in range(FLAGS.n_predator + FLAGS.n_prey): 131 | p = np.argwhere(np.array(state)==i+1)[0] 132 | ret[2 * i] = (p[0] - FLAGS.map_size /2.) / FLAGS.map_size 133 | ret[2 * i + 1] = (p[1] - FLAGS.map_size /2.) / FLAGS.map_size 134 | 135 | 136 | return ret 137 | 138 | def get_predator_pos(self, state): 139 | """ 140 | return position of agent 1 and 2 141 | :param state: input is state 142 | :return: 143 | """ 144 | state_list = list(np.array(state).ravel()) 145 | return state_list.index(1), state_list.index(2) 146 | 147 | def get_pos_by_id(self, state, id): 148 | state_list = list(np.array(state).ravel()) 149 | return state_list.index(id) 150 | 151 | def onehot(self, index, size): 152 | n_hot = np.zeros(size) 153 | n_hot[index] = 1.0 154 | return n_hot 155 | 156 | def index_to_action(self, index): 157 | action_list = [] 158 | for i in range(FLAGS.n_predator-1): 159 | action_list.append(index%5) 160 | index = index/5 161 | action_list.append(index) 162 | return action_list 163 | 164 | def action_to_index(self, action): 165 | index = 0 166 | for i in range(FLAGS.n_predator): 167 | index += action[i] * 5 ** i 168 | return index 169 | 170 | def action_to_onehot(self, action): 171 | onehot = np.zeros([self._n_player, self._action_dim_single]) 172 | for i in range(self._n_player): 173 | onehot[i, action[i]] = 1 174 | return onehot 175 | 176 | def q_diff(self): 177 | 178 | 179 | print self.update_cnt 180 | 181 | def q(self, mode, step =0): 182 | 183 | if mode == 0: 184 | q_value = np.zeros([2,2]) 185 | for i in range(2): 186 | for j in range(2): 187 | s = np.array([10]) 188 | action = [i,j] 189 | a = self.action_to_onehot(action) 190 | q_value[i,j] = self.q_network.get_q_values(s[None],a[None])[0] 191 | 192 | 193 | return q_value#, qi_value, p_value 194 | 195 | if mode == 1: 196 | q_value = np.zeros([3,3]) 197 | for i in range(3): 198 | for j in range(3): 199 | s = np.array([1]) 200 | action = [i,j] 201 | a = self.action_to_onehot(action) 202 | q_value[i,j] = self.q_network.get_q_values(s[None],a[None])[0] 203 | 204 | qi_value = self.q_network.get_qp_values(s[None])[0] 205 | 206 | q_value2 = np.zeros([3,3]) 207 | for i in range(3): 208 | for j in range(3): 209 | s = np.array([2]) 210 | action = [i,j] 211 | a = self.action_to_onehot(action) 212 | q_value2[i,j] = self.q_network.get_q_values(s[None],a[None])[0] 213 | 214 | qi_value2 = self.q_network.get_qp_values(s[None])[0] 215 | 216 | 217 | 218 | return q_value, qi_value, q_value2, qi_value2 219 | 220 | if mode == 2: 221 | q_value = np.zeros([11]) 222 | for i in range(11): 223 | s = np.ones(10) * 0.5 224 | a = np.zeros([10,2]) 225 | a[:10-i,0] = 1. 226 | a[10-i:,1] = 1. 227 | q_value[i] = self.q_network.get_q_values(s[None],a[None])[0] 228 | 229 | return q_value 230 | 231 | 232 | elif mode == 3: 233 | q_value_1 = np.zeros([2,2]) 234 | q_value_2 = np.zeros([2,2]) 235 | q_value_3 = np.zeros([2,2]) 236 | for i in range(2): 237 | for j in range(2): 238 | s = np.array([0]) 239 | action = [i,j] 240 | a = self.action_to_onehot(action) 241 | q_value_1[i,j] = self.q_network.get_q_values(s[None],a[None])[0] 242 | 243 | q_value_2 = np.zeros([2,2]) 244 | for i in range(2): 245 | for j in range(2): 246 | s = np.array([1]) 247 | action = [i,j] 248 | a = self.action_to_onehot(action) 249 | q_value_2[i,j] = self.q_network.get_q_values(s[None],a[None])[0] 250 | 251 | q_value_3 = np.zeros([2,2]) 252 | for i in range(2): 253 | for j in range(2): 254 | s = np.array([2]) 255 | action = [i,j] 256 | a = self.action_to_onehot(action) 257 | q_value_3[i,j] = self.q_network.get_q_values(s[None],a[None])[0] 258 | 259 | return q_value_1, q_value_2, q_value_3 260 | 261 | 262 | elif mode == 4: 263 | samples = 1000 264 | x = np.zeros(samples) 265 | y = np.zeros(samples) 266 | z = np.zeros(samples) 267 | for i in range (samples): 268 | act_n = [] 269 | for j in range(self._n_player): 270 | action = np.random.choice(self._action_dim_single) 271 | act_n.append(action) 272 | # act_n = np.array(list(bin(int(i))[2:].zfill(8)),dtype='int') 273 | a = self.action_to_onehot(act_n) 274 | s = np.ones(self._n_player) * 0.1 275 | x[i] = np.sum(np.array(act_n) * s) 276 | y[i] = self.q_network.get_q_values(s[None],a[None])[0] 277 | z[i] = self.q_network.get_qp_values(s[None],a[None])[0] 278 | 279 | order = np.argsort(x) 280 | xs = np.array(x)[order] 281 | ys = np.array(y)[order] 282 | zs = np.array(z)[order] 283 | 284 | np.save(config.file_name + "1", xs) 285 | np.save(config.file_name + "2", ys) 286 | np.save(config.file_name + "3", zs) 287 | 288 | plt.scatter(xs, ys) 289 | plt.xlim(0, 2.5) 290 | plt.ylim(0, 18) 291 | plt.xlabel('State-Action Fair') 292 | plt.ylabel('Q-value') 293 | plt.savefig(config.file_name + '-A1.png') 294 | 295 | plt.clf() 296 | 297 | plt.plot(xs, ys) 298 | plt.xlim(0, 2.5) 299 | plt.ylim(0, 18) 300 | plt.xlabel('State-Action Fair') 301 | plt.ylabel('Q-value') 302 | plt.savefig(config.file_name + '-A2.png') 303 | 304 | plt.clf() 305 | 306 | plt.scatter(xs, zs) 307 | plt.xlim(0, 2.5) 308 | plt.ylim(0, 18) 309 | plt.xlabel('State-Action Fair') 310 | plt.ylabel('Q-value') 311 | plt.savefig(config.file_name + '-B1.png') 312 | 313 | plt.clf() 314 | 315 | plt.plot(xs, zs) 316 | plt.xlim(0, 2.5) 317 | plt.ylim(0, 18) 318 | plt.xlabel('State-Action Fair') 319 | plt.ylabel('Q-value') 320 | plt.savefig(config.file_name + '-B2.png') 321 | 322 | return "FINISH" 323 | 324 | elif mode == 5: 325 | s = np.array([1]) 326 | Q_matrix = np.zeros([21,21]) 327 | Q_matrix2 = np.zeros([21,21]) 328 | for i in range(21): 329 | for j in range(21): 330 | act_n = np.array([i,j]) 331 | a = self.action_to_onehot(act_n) 332 | Q_matrix[i,j] = np.mean(self.q_network.get_q_values(s[None],a[None])) 333 | Q_matrix2[i,j] = np.mean(self.q_network.get_qp_values(s[None],a[None])) 334 | optimal_action = self.q_network.get_action(s[None])[0] 335 | 336 | ind = np.unravel_index(np.argmax(Q_matrix, axis=None), Q_matrix.shape) 337 | print 'optimal_action', optimal_action, np.mean(self.q_network.get_q_values(s[None],self.action_to_onehot(optimal_action)[None])) 338 | print 'ind', ind, self.q_network.get_q_values(s[None],self.action_to_onehot(ind)[None])[0] 339 | 340 | # plt.clf() 341 | # self.fig = plt.figure(figsize=(4,4)) 342 | # self.ims = [] 343 | title = self.axes.text(0.5,1.05,"Step {}".format(step), 344 | size=plt.rcParams["axes.titlesize"], 345 | ha="center", transform=self.axes.transAxes, ) 346 | print "ADD!" 347 | self.ims.append([plt.pcolor(Q_matrix2,vmin=-10, vmax=10), title]) 348 | 349 | 350 | 351 | return Q_matrix, Q_matrix2 352 | 353 | elif mode == 6: 354 | im_ani = animation.ArtistAnimation(self.fig, self.ims, interval=200, #repeat_delay=3000, 355 | blit=False) 356 | im_ani.save(str(FLAGS.algorithm)+'.gif', dpi=80, writer='imagemagick') 357 | return True 358 | 359 | 360 | elif mode == 5: 361 | 362 | s = np.array([1] * FLAGS.n_predator) 363 | optimal_action = self.q_network.get_action(s[None])[0] 364 | r = np.sum(np.array(optimal_action))/10. 365 | 366 | x = np.linspace(0, 10, 1000) 367 | y = np.array([r * np.exp( -np.square(r-5)/1) + r * np.exp(-np.square(r-8)/0.25) for r in x]) 368 | # a = np.load("1-" + "pqmix5" + "-" + str((i+1)*1000)+".npy") 369 | x2 = np.sum(np.array(optimal_action))/10. 370 | y2 = x2 * np.exp( -np.square(x2-5)/1) + x2 * np.exp(-np.square(x2-8)/0.25) 371 | im = plt.plot(x,y,'black') 372 | im2 = plt.plot([x2],[y2],'ro') 373 | title = self.axes.text(0.5,1.05,"Step {}".format(step), 374 | size=plt.rcParams["axes.titlesize"], 375 | ha="center", transform=self.axes.transAxes, ) 376 | self.ims.append(im2) 377 | 378 | 379 | 380 | s = np.array([1]) 381 | Q_matrix = np.zeros([21,21]) 382 | Q_matrix2 = np.zeros([21,21]) 383 | for i in range(21): 384 | for j in range(21): 385 | act_n = np.array([i,j]) 386 | a = self.action_to_onehot(act_n) 387 | Q_matrix[i,j] = np.mean(self.q_network.get_q_values(s[None],a[None])) 388 | Q_matrix2[i,j] = np.mean(self.q_network.get_qp_values(s[None],a[None])) 389 | optimal_action = self.q_network.get_action(s[None])[0] 390 | 391 | ind = np.unravel_index(np.argmax(Q_matrix, axis=None), Q_matrix.shape) 392 | print 'optimal_action', optimal_action, np.mean(self.q_network.get_q_values(s[None],self.action_to_onehot(optimal_action)[None])) 393 | print 'ind', ind, self.q_network.get_q_values(s[None],self.action_to_onehot(ind)[None])[0] 394 | 395 | # plt.clf() 396 | # self.fig = plt.figure(figsize=(4,4)) 397 | # self.ims = [] 398 | title = self.axes.text(0.5,1.05,"Step {}".format(step), 399 | size=plt.rcParams["axes.titlesize"], 400 | ha="center", transform=self.axes.transAxes, ) 401 | print "ADD!" 402 | self.ims.append([plt.pcolor(Q_matrix2,vmin=-10, vmax=10), title]) 403 | 404 | 405 | 406 | 407 | 408 | -------------------------------------------------------------------------------- /Others/agents/pos_cac_fo/replay_buffer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf8 3 | import logging 4 | import config 5 | from collections import deque 6 | import random 7 | 8 | FLAGS = config.flags.FLAGS 9 | 10 | logger = logging.getLogger("Agent.replay") 11 | result = logging.getLogger('Result') 12 | 13 | 14 | class ReplayBuffer: 15 | def __init__(self): 16 | self.replay_memory_capacity = FLAGS.b_size # capacity of experience replay memory 17 | self.minibatch_size = FLAGS.m_size # size of minibatch from experience replay memory for updates 18 | self.replay_memory = deque(maxlen=self.replay_memory_capacity) 19 | 20 | def add_to_memory(self, experience): 21 | self.replay_memory.append(experience) 22 | 23 | def sample_from_memory(self): 24 | return random.sample(self.replay_memory, self.minibatch_size) 25 | 26 | def erase(self): 27 | self.replay_memory.popleft() 28 | -------------------------------------------------------------------------------- /Others/agents/pos_cac_fo/trainer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf8 3 | 4 | """ 5 | =========================================== 6 | :mod:`qlearn` Q-Learning 7 | =========================================== 8 | 9 | 10 | 설명 11 | ===== 12 | 13 | Choose action based on q-learning algorithm 14 | 15 | """ 16 | 17 | import numpy as np 18 | from agents.pos_cac_fo.agent import Agent 19 | from agents.simple_agent import RandomAgent as NonLearningAgent 20 | from agents.evaluation import Evaluation 21 | import logging 22 | import config 23 | 24 | FLAGS = config.flags.FLAGS 25 | logger = logging.getLogger("Agent") 26 | result = logging.getLogger('Result') 27 | 28 | training_step = FLAGS.training_step 29 | testing_step = FLAGS.testing_step 30 | 31 | if FLAGS.epsilon == "No": 32 | print "No epsilon decreasing" 33 | epsilon_dec = 0.0/training_step 34 | elif FLAGS.epsilon == "Yes": 35 | print "Epsilon decreasing" 36 | epsilon_dec = 2.0/training_step 37 | epsilon_min = 0.1 38 | 39 | 40 | class Trainer(object): 41 | 42 | def __init__(self, env): 43 | logger.info("Centralized DQN Trainer is created") 44 | 45 | self._env = env 46 | self._eval = Evaluation() 47 | self._n_predator = FLAGS.n_predator 48 | self._n_prey = FLAGS.n_prey 49 | self.action_dim = self._env.call_action_dim() 50 | self.state_dim = self._env.call_state_dim() 51 | 52 | self._agent = Agent(self.action_dim, self.state_dim) 53 | 54 | self.epsilon = 1.0 55 | 56 | def learn(self): 57 | 58 | step = 0 59 | episode = 0 60 | print_flag = False 61 | array = np.zeros([FLAGS.training_step/FLAGS.eval_step,4]) 62 | while step < training_step: 63 | episode += 1 64 | ep_step = 0 65 | obs = self._env.reset() 66 | state = obs 67 | total_reward = 0 68 | 69 | while True: 70 | step += 1 71 | ep_step += 1 72 | action = self.get_action(obs, step, state) 73 | obs_n, reward, done, info = self._env.step(action) 74 | state_n = obs_n 75 | 76 | done_single = sum(done) > 0 77 | if ep_step >= FLAGS.max_step : 78 | done_single = True 79 | self.train_agents(state, action, reward, state_n, done_single) 80 | 81 | obs = obs_n 82 | state = state_n 83 | total_reward += np.sum(reward) * (FLAGS.df ** (ep_step-1)) 84 | # if step % 100 ==0: 85 | # print step, self._agent.q() 86 | if is_episode_done(done, step) or ep_step >= FLAGS.max_step : 87 | if print_flag: 88 | print "[train_ep %d]" % (episode),"\tstep:", step, "\tep_step:", ep_step, "\treward", total_reward 89 | break 90 | 91 | 92 | if episode % FLAGS.eval_step == 0: 93 | 94 | self.test(episode) 95 | 96 | 97 | self._eval.summarize() 98 | 99 | 100 | def get_action(self, obs, step, state, train=True): 101 | act_n = [] 102 | self.epsilon = max(self.epsilon - epsilon_dec, epsilon_min) 103 | 104 | # Action of predator 105 | action_list = self._agent.act(state) 106 | 107 | for i in range(self._n_predator): 108 | if train and (step < FLAGS.m_size * FLAGS.pre_train_step or np.random.rand() < self.epsilon): # with prob. epsilon 109 | action = np.random.choice(self.action_dim) 110 | act_n.append(action) 111 | else: 112 | act_n.append(action_list[i]) 113 | 114 | 115 | 116 | return np.array(act_n, dtype=np.int32) 117 | 118 | def train_agents(self, state, action, reward, state_n, done): 119 | self._agent.train(state, action, reward, state_n, done) 120 | 121 | def test(self, curr_ep=None): 122 | 123 | step = 0 124 | episode = 0 125 | 126 | test_flag = FLAGS.kt 127 | sum_reward = 0 128 | while step < testing_step: 129 | episode += 1 130 | obs = self._env.reset() 131 | state = obs 132 | if test_flag: 133 | print "\nInit\n", state 134 | total_reward = 0 135 | 136 | ep_step = 0 137 | 138 | while True: 139 | 140 | step += 1 141 | ep_step += 1 142 | 143 | action = self.get_action(obs, step, state, False) 144 | 145 | obs_n, reward, done, info = self._env.step(action) 146 | state_n = obs_n 147 | 148 | if test_flag: 149 | aa = raw_input('>') 150 | if aa == 'c': 151 | test_flag = False 152 | print action 153 | print state_n 154 | print reward 155 | 156 | obs = obs_n 157 | state = state_n 158 | total_reward += np.sum(reward) * (FLAGS.df ** (ep_step-1)) 159 | 160 | if is_episode_done(done, step, "test") or ep_step >= FLAGS.max_step: 161 | break 162 | sum_reward += total_reward 163 | 164 | print "Algorithm ", FLAGS.algorithm, ",Average reward: ", curr_ep, sum_reward /episode 165 | self._eval.update_value("test_result", sum_reward /episode, curr_ep) 166 | 167 | 168 | 169 | def is_episode_done(done, step, e_type="train"): 170 | 171 | if e_type == "test": 172 | if sum(done) > 0 or step >= FLAGS.testing_step: 173 | return True 174 | else: 175 | return False 176 | 177 | else: 178 | if sum(done) > 0 or step >= FLAGS.training_step: 179 | return True 180 | else: 181 | return False 182 | 183 | 184 | -------------------------------------------------------------------------------- /Others/agents/replay_buffer.py: -------------------------------------------------------------------------------- 1 | from collections import deque 2 | import random 3 | import config 4 | import numpy as np 5 | 6 | FLAGS = config.flags.FLAGS 7 | 8 | replay_memory_capacity = FLAGS.replay_buffer_capacity # capacity of experience replay memory 9 | minibatch_size = FLAGS.minibatch_size # size of minibatch from experience replay memory for updates 10 | trace_length = FLAGS.rnn_trace_len 11 | 12 | class ReplayBuffer: 13 | def __init__(self): 14 | self.replay_memory = deque(maxlen=replay_memory_capacity) 15 | 16 | def add_to_memory(self, experience): 17 | self.replay_memory.append(experience) 18 | 19 | def sample_from_memory(self): 20 | return random.sample(self.replay_memory, minibatch_size) 21 | 22 | class RNNReplayBuffer: 23 | def __init__(self): 24 | self.replay_memory = deque(maxlen=replay_memory_capacity) 25 | self.paddings = None 26 | 27 | def add_to_memory(self, experience): 28 | self.replay_memory.append(experience) 29 | 30 | if self.paddings == None: 31 | obs = np.zeros(experience[-1][0].shape) 32 | self.paddings = (obs, 0, 0, obs, True) 33 | 34 | def pad_trace(self, trace): 35 | trace.extend([self.paddings]*(trace_length-len(trace))) 36 | return trace 37 | 38 | def sample_from_memory(self): 39 | if len(self.replay_memory) < minibatch_size: 40 | n_points_per_ep = int(np.ceil(minibatch_size * 1./len(self.replay_memory))) 41 | sampled_episodes = self.replay_memory 42 | else: 43 | n_points_per_ep = 1 44 | sampled_episodes = random.sample(self.replay_memory, minibatch_size) 45 | 46 | sampledTraces = [] 47 | true_trace_length = np.ones(minibatch_size)*trace_length 48 | 49 | for i in range(n_points_per_ep): 50 | for j, episode in enumerate(sampled_episodes): 51 | if len(episode) < trace_length: 52 | true_trace_length[j] = len(episode) 53 | sampledTraces.append(self.pad_trace(episode)) # use the whole episode 54 | else: 55 | point = np.random.randint(0,len(episode) + 1 - trace_length) 56 | sampledTraces.append(episode[point:point + trace_length]) 57 | 58 | sampledTraces = np.array(sampledTraces[:minibatch_size]) # discard extra samples 59 | sampledTraces = np.reshape(sampledTraces,[minibatch_size*trace_length,-1]) 60 | return sampledTraces, true_trace_length 61 | -------------------------------------------------------------------------------- /Others/agents/simple_agent.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class RandomAgent(object): 4 | def __init__(self, action_dim): 5 | self._action_dim = action_dim 6 | 7 | def act(self, obs): 8 | return np.random.randint(self._action_dim) 9 | 10 | def train(self, minibatch, step): 11 | return 12 | 13 | class StaticAgent(object): 14 | def __init__(self, action): 15 | self._action = action 16 | 17 | def act(self, obs): 18 | return self._action 19 | 20 | def train(self, minibatch, step): 21 | return -------------------------------------------------------------------------------- /Others/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf8 3 | 4 | import tensorflow as tf 5 | import logging 6 | import time 7 | import envs.config_env as config_env 8 | import agents.config_agents as config_agent 9 | 10 | flags = tf.flags 11 | 12 | flags.DEFINE_integer("seed", 0, "Random seed number") 13 | flags.DEFINE_string("folder", "default", "Result file folder name") 14 | 15 | config_env.config_env(flags) 16 | config_agent.config_agent(flags) 17 | 18 | # Make result file with given filename 19 | now = time.localtime() 20 | s_time = "%02d%02d%02d%02d%02d" % (now.tm_mon, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec) 21 | file_name = str(flags.FLAGS.n_predator) + "-" 22 | file_name += config_env.get_filename() + "-" + config_agent.get_filename() 23 | file_name += "-seed-"+str(flags.FLAGS.seed)+"-" + s_time 24 | result = logging.getLogger('Result') 25 | result.setLevel(logging.INFO) 26 | 27 | if flags.FLAGS.folder == "default": 28 | result_fh = logging.FileHandler("./results/eval/r-" + file_name + ".txt") 29 | nn_filename = "./results/nn/n-" + file_name 30 | else: 31 | result_fh = logging.FileHandler("./results/eval/"+ flags.FLAGS.folder +"/r-" + file_name + ".txt") 32 | nn_filename = "./results/nn/" + flags.FLAGS.folder + "/n-" + file_name 33 | 34 | 35 | result_fm = logging.Formatter('[%(filename)s:%(lineno)s] %(asctime)s\t%(message)s') 36 | result_fh.setFormatter(result_fm) 37 | result.addHandler(result_fh) 38 | 39 | # Used to map colors to integers 40 | COLOR_TO_IDX = { 41 | 'red' : 0, 42 | 'green' : 1, 43 | 'blue' : 2, 44 | 'purple': 3, 45 | 'yellow': 4, 46 | 'grey' : 5 47 | } 48 | 49 | IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys())) 50 | 51 | # Map of object type to integers 52 | OBJECT_TO_IDX = { 53 | 'empty' : 0, 54 | 'wall' : 1, 55 | 'agent' : 2, 56 | 'predator' : 3, 57 | 'prey' : 4 58 | } 59 | 60 | IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys())) -------------------------------------------------------------------------------- /Others/envs/#environment.py#: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | import numpy as np 4 | import config 5 | 6 | # environment for all agents in the multiagent world 7 | # currently code assumes that no agents will be created/destroyed at runtime! 8 | 9 | FLAGS = config.flags.FLAGS 10 | 11 | penalty = FLAGS.penalty 12 | agents = FLAGS.n_predator 13 | 14 | class MultiAgentEnv(gym.Env): 15 | metadata = { 16 | 'render.modes': ['human', 'rgb_array'] 17 | } 18 | 19 | def __init__(self, world, reset_callback=None, reward_callback=None, 20 | observation_callback=None, info_callback=None, 21 | done_callback=None, shared_viewer=True): 22 | 23 | self.world = world 24 | self.agents = self.world.agents 25 | # set required vectorized gym env property 26 | self.n = len(world.agents) 27 | # scenario callbacks 28 | self.reset_callback = reset_callback 29 | self.reward_callback = reward_callback 30 | self.observation_callback = observation_callback 31 | self.info_callback = info_callback 32 | self.done_callback = done_callback 33 | 34 | # environment parameters 35 | self.discrete_comm_space = True 36 | self.time = 0 37 | 38 | # configure spaces 39 | self.action_space = [] 40 | self.observation_space = [] 41 | self.agent_precedence = [] 42 | for agent in self.agents: 43 | self.agent_precedence.append(agent.itype) 44 | total_action_space = [] 45 | u_action_space = spaces.Discrete(world.dim_p * 2 + 1) 46 | total_action_space.append(u_action_space) 47 | 48 | # communication action space 49 | if self.discrete_comm_space: 50 | c_action_space = spaces.Discrete(world.dim_c) 51 | else: 52 | c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,)) 53 | 54 | if not agent.silent: 55 | total_action_space.append(c_action_space) 56 | 57 | # total action space 58 | if len(total_action_space) > 1: 59 | # all action spaces are discrete, so simplify to MultiDiscrete action space 60 | if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]): 61 | act_space = spaces.MultiDiscrete([act_space.n for act_space in total_action_space]) 62 | else: 63 | act_space = spaces.Tuple(total_action_space) 64 | self.action_space.append(act_space) 65 | else: 66 | self.action_space.append(total_action_space[0]) 67 | 68 | # observation space 69 | obs_dim = len(observation_callback(agent, self.world).flatten()) 70 | self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,))) 71 | agent.action.c = np.zeros(self.world.dim_c) 72 | 73 | def get_agent_profile(self): 74 | agent_profile = {} 75 | 76 | for i, agent in enumerate(self.agents): 77 | if agent.itype in agent_profile: 78 | agent_profile[agent.itype]['n_agent'] += 1 79 | agent_profile[agent.itype]['idx'].append(i) 80 | else: 81 | if isinstance(self.action_space[i], spaces.Discrete): 82 | act_space = self.action_space[i].n 83 | com_space = 0 84 | else: 85 | act_space = self.action_space[i].nvec[0] 86 | com_space = self.action_space[i].nvec[1] 87 | 88 | agent_profile[agent.itype] = { 89 | 'n_agent': 1, 90 | 'idx': [i], 91 | 'act_dim': act_space, 92 | 'com_dim': com_space, 93 | 'obs_dim': self.observation_space[i].shape 94 | } 95 | 96 | return agent_profile 97 | 98 | def step(self, action_n): 99 | obs_n = [] 100 | reward_n = [] 101 | done_n = [] 102 | info_n = {'n': []} 103 | 104 | self.agents = self.world.agents 105 | self.world.step(action_n) 106 | 107 | for agent in self.agents: 108 | obs_n.append(self._get_obs(agent)) 109 | reward_n.append(self._get_reward(agent)) 110 | done_n.append(self._get_done(agent)) 111 | info_n['n'].append(self._get_info(agent)) 112 | 113 | return obs_n, reward_n, done_n, info_n 114 | 115 | def reset(self): 116 | # reset world 117 | self.reset_callback(self.world) 118 | 119 | obs_n = [] 120 | for agent in self.agents: 121 | obs_n.append(self._get_obs(agent)) 122 | return obs_n 123 | 124 | # get info used for benchmarking 125 | def _get_info(self, agent): 126 | if self.info_callback is None: 127 | return {} 128 | return self.info_callback(agent, self.world) 129 | 130 | # get observation for a particular agent 131 | def _get_obs(self, agent): 132 | if self.observation_callback is None: 133 | return np.zeros(0) 134 | return self.observation_callback(agent, self.world) 135 | 136 | # get dones for a particular agent 137 | # unused right now -- agents are allowed to go beyond the viewing screen 138 | def _get_done(self, agent): 139 | if self.done_callback is None: 140 | return False 141 | return self.done_callback(agent, self.world) 142 | 143 | # get reward for a particular agent 144 | def _get_reward(self, agent): 145 | if self.reward_callback is None: 146 | return 0.0 147 | return self.reward_callback(agent, self.world) 148 | 149 | def get_full_encoding(self): 150 | return self.world.get_full_encoding() 151 | 152 | class MultiAgentSimpleEnv1(gym.Env): 153 | def __init__(self, n_predator=1): 154 | 155 | # self.state = [0] 156 | self.action_dim = 2 157 | self.state_dim = 3 158 | 159 | self.state = np.array([1,0,0]) 160 | self.payoff_1 = np.array([[7.,7.],[7.,7.]]) 161 | # self.payoff_2 = np.array([[0.,1.],[1.,8.]]) 162 | self.payoff_2 = np.array([[0.,1.],[1.,8.]]) 163 | 164 | def reset(self): 165 | 166 | self.state = np.array([1,0,0]) 167 | 168 | return self.state 169 | 170 | def step(self, action): 171 | 172 | info = {'n': []} 173 | reward = [] 174 | done = [] 175 | 176 | if self.state[0] == 1: 177 | if action[0] == 0: 178 | self.state = np.array([0,1,0]) 179 | reward.append(0.) 180 | done.append(False) 181 | elif action[0] == 1: 182 | self.state = np.array([0,0,1]) 183 | reward.append(0.) 184 | done.append(False) 185 | elif self.state[1] == 1: 186 | self.state = np.array([0,0,0]) 187 | reward.append(self.payoff_1[action[0],action[1]]) 188 | done.append(True) 189 | elif self.state[2] == 1: 190 | self.state = np.array([0,0,0]) 191 | reward.append(self.payoff_2[action[0],action[1]]) 192 | done.append(True) 193 | 194 | return self.state, reward, done, info 195 | 196 | def call_action_dim(self): 197 | return self.action_dim 198 | 199 | def call_state_dim(self): 200 | return self.state_dim 201 | 202 | 203 | class MultiAgentSimpleEnv2(gym.Env): 204 | def __init__(self, n_predator=1): 205 | 206 | self.state = [10] 207 | self.action_dim = 3 208 | self.state_dim = 1 209 | 210 | # self.payoff2 = np.array([[10.,3.,-10.],[2.,1.,-10.],[-10.,-10.,10.]]) 211 | # self.payoff2 = np.array([[10,-10,-10],[-10,1,2],[-10,3,5]]) 212 | # self.payoff2 = np.array([[10.,9.,5.],[9.,5.,1.],[5.,1.,0.]]) 213 | # self.payoff2 = np.array([[12.60697365, 7.6309042, 6.60896063], [ 7.62529612, 2.64922833, 1.62728453], [ 6.71724892, 1.74118018, 0.71923661]]) 214 | # self.payoff2 = np.array([[10,6,5],[6,4,2],[5,3,1]]) # Climbing game 215 | # self.payoff2 = np.array([[5,0,0],[6,7,-30],[0,-30,11]]) 216 | # self.payoff2 = np.array([[8,-penalty,-penalty],[-penalty,0,0],[-penalty,0,0]]) 217 | self.payoff2 = np.array([[10.0,0.,0.],[0.,2.,0.],[0.,0.,10.0]]) # Penalty game 218 | 219 | 220 | def reset(self): 221 | 222 | self.state = [10] 223 | 224 | return self.state 225 | 226 | def step(self, action): 227 | 228 | info = {'n': []} 229 | reward = [] 230 | done = [] 231 | reward.append(self.payoff2[action[0],action[1]]) 232 | self.state = [3] 233 | done.append(True) 234 | 235 | return self.state, reward, done, info 236 | 237 | def call_action_dim(self): 238 | return self.action_dim 239 | 240 | def call_state_dim(self): 241 | return self.state_dim 242 | 243 | class MultiAgentSimpleEnv3(gym.Env): 244 | def __init__(self, n_predator=1): 245 | 246 | self.state = [0] 247 | self.action_dim = 2 248 | self.state_dim = 1 249 | 250 | # self.payoff2 = np.array([[10.,3.,-10.],[2.,1.,-10.],[-10.,-10.,10.]]) 251 | # self.payoff2 = np.array([[10,-10,-10],[-10,1,2],[-10,3,5]]) 252 | # self.payoff2 = np.array([[10.,9.,8.],[9.,2.,1.],[8.,1.,0.]]) 253 | # self.payoff2 = np.array([[12.60697365, 7.6309042, 6.60896063], [ 7.62529612, 2.64922833, 1.62728453], [ 6.71724892, 1.74118018, 0.71923661]]) 254 | # self.payoff2 = np.array([[11,-30,0],[-30,7,6],[0,0,5]]) # Climbing game 255 | # self.payoff2 = np.array([[5,0,0],[6,7,-30],[0,-30,11]]) 256 | # self.payoff2 = np.array([[10.0,0.,-10.],[0.,2.,0.],[-10.,0.,10.]]) # Penalty game 257 | 258 | # self.payoff1 = np.array([[20,-10,0],[-10,2,6],[0,8,-10]]) 259 | # self.payoff2 = np.array([[-10,8,0],[6,2,-10],[0,-10,10]]) 260 | 261 | self.payoff1 = np.array([[6,7],[8,9]]) 262 | self.payoff2 = np.array([[0,1 - penalty],[1 - penalty,10]]) 263 | 264 | 265 | # if np.random.randint(2) == 0: 266 | # self.state = [1] 267 | # else: 268 | # self.state = [2] 269 | def reset(self): 270 | 271 | # if np.random.randint(2) == 0: 272 | # self.state = [1] 273 | # else: 274 | # self.state = [2] 275 | self.state = [0] 276 | 277 | return self.state 278 | 279 | def step(self, action): 280 | 281 | info = {'n': []} 282 | reward = [] 283 | done = [] 284 | if self.state[0] == 0: 285 | reward.append(0) 286 | if action[0] == 0: 287 | self.state = [1] 288 | else: 289 | self.state = [2] 290 | done.append(False) 291 | elif self.state[0] == 1: 292 | reward.append(self.payoff1[action[0],action[1]]) 293 | self.state = [3] 294 | done.append(True) 295 | else: 296 | reward.append(self.payoff2[action[0],action[1]]) 297 | self.state = [3] 298 | done.append(True) 299 | # self.state = [3] 300 | # done.append(True) 301 | # print self.state[0], action[0], action[1], reward 302 | return self.state, reward, done, info 303 | 304 | def call_action_dim(self): 305 | return self.action_dim 306 | 307 | def call_state_dim(self): 308 | return self.state_dim 309 | 310 | 311 | class MultiAgentSimpleEnv4(gym.Env): 312 | def __init__(self, n_predator=1): 313 | 314 | self.state = np.random.uniform(0,2,agents) 315 | # self.state = np.ones(agents) 316 | # self.state = self.state / np.sum(self.state) * 10 317 | # self.state = np.ones(agents) * 0.5 318 | self.action_dim = 11 319 | self.state_dim = agents 320 | 321 | 322 | 323 | def reset(self): 324 | 325 | self.state = np.random.uniform(0.0,1.0,agents) 326 | # self.state = np.ones(agents) 327 | # self.state = self.state / np.sum(self.state) * 10 328 | # self.state = self.state 329 | # self.state = np.ones(agents) * 0.5 330 | 331 | return self.state 332 | 333 | def step(self, action): 334 | 335 | info = {'n': []} 336 | reward = [] 337 | done = [] 338 | # if np.sum(np.array(action)) == 0: 339 | # reward.append(0) 340 | # if np.sum(np.array(action)) == 1: 341 | # np.sum(np.array(action) * self.state) 342 | # else: 343 | # r = (-1) * np.sum(np.array(action) * self.state) * penalty * 0.1 344 | # reward.append(r) 345 | r = np.sum(np.array(action) * self.state) 346 | # print np.array(action) 347 | # print self.state 348 | # print 'reward:', r 349 | th = 20.0 350 | th2 = 40.0 351 | th3 = 50.0 352 | # if r < -th: 353 | # reward.append(abs(r)) 354 | 355 | # if r < th2: 356 | # reward.append( -np.square(r-th) / 40. + 10) 357 | # # elif r < th2: 358 | # # reward.append( (2*th - r) / 2 ) 359 | 360 | # else: 361 | # reward.append( -np.square(r-th3) / 5. + 20) 362 | # 10*e^(-(x-30)^2/100) + 20 *e^(-(x-90)^2/400) 363 | rv = 10 * np.exp( -np.square(r-25) / 50) + 15 * np.exp( -np.squarv = 10 * np.exp( -np.square(r-25) / 50) + 15 \│························ 364 | * np.exp( -np.square(r-40)/10)re(r-40)/10) 365 | # print r 366 | # print rv 367 | # 10*e^(-(x-35)^2/100) + 15 *e^(-(x-80)^2/100) 368 | reward.append(rv) 369 | 370 | # total_reward = 0 371 | # th = 1.0 372 | # for i in range(self.action_dim-1): 373 | # idx = np.where(np.array(action) == i+1, 1.0, 0.0) 374 | # r = np.sum(idx * self.state) 375 | # if r > th: 376 | # total_reward += th - (r-th) * penalty 377 | # else: 378 | # total_reward += r 379 | # reward.append(total_reward) 380 | 381 | # r = abs(np.count_nonzero(action) - (agents - np.count_nonzero(action))) 382 | # if r == 0: 383 | # reward.append(10) 384 | # else: 385 | # reward.append( (r-10) * penalty) 386 | # self.state = np.random.uniform(0,1,agents) 387 | # self.state = np.ones(10) * 0.5 388 | done.append(True) 389 | 390 | return self.state, reward, done, info 391 | 392 | def call_action_dim(self): 393 | return self.action_dim 394 | 395 | def call_state_dim(self): 396 | return self.state_dim 397 | -------------------------------------------------------------------------------- /Others/envs/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | # from gym.envs.registration import register 3 | 4 | # Multiagent envs 5 | # ---------------------------------------- 6 | 7 | # register( 8 | # id='MultiagentSimple-v0', 9 | # entry_point='multiagent.envs:SimpleEnv', 10 | # # FIXME(cathywu) currently has to be exactly max_path_length parameters in 11 | # # rllab run script 12 | # max_episode_steps=100, 13 | # ) 14 | 15 | # register( 16 | # id='MultiagentSimpleSpeakerListener-v0', 17 | # entry_point='multiagent.envs:SimpleSpeakerListenerEnv', 18 | # max_episode_steps=100, 19 | # ) 20 | 21 | logger_agent = logging.getLogger('GridMARL') 22 | logger_agent.setLevel(logging.INFO) 23 | 24 | fm = logging.Formatter('[%(levelname)s|%(filename)s:%(lineno)s] %(asctime)s > [%(name)s] %(message)s') 25 | sh = logging.StreamHandler() 26 | sh.setFormatter(fm) 27 | logger_agent.addHandler(sh) 28 | 29 | # fh_agent = logging.FileHandler('./agent.log') 30 | # fh_agent.setFormatter(fm) 31 | # logger_agent.addHandler(fh_agent) 32 | -------------------------------------------------------------------------------- /Others/envs/config_env.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf8 3 | 4 | def config_env(_flags): 5 | flags = _flags 6 | 7 | # Scenario 8 | flags.DEFINE_string("scenario", "pursuit", "Scenario") 9 | flags.DEFINE_integer("n_predator", 2, "Number of predators") 10 | flags.DEFINE_integer("n_prey", 1, "Number of preys") 11 | 12 | # Observation 13 | flags.DEFINE_integer("history_len", 1, "How many previous steps we look back") 14 | 15 | # core 16 | flags.DEFINE_integer("map_size", 3, "Size of the map") 17 | flags.DEFINE_float("render_every", 1000, "Render the nth episode") 18 | flags.DEFINE_integer("penalty", 10, "penalty") 19 | 20 | def get_filename(): 21 | import config 22 | FLAGS = config.flags.FLAGS 23 | 24 | return "s-"+FLAGS.scenario+"-map-"+str(FLAGS.map_size) +"-penalty-"+str(FLAGS.penalty) -------------------------------------------------------------------------------- /Others/envs/environment.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | import numpy as np 4 | import config 5 | 6 | # environment for all agents in the multiagent world 7 | # currently code assumes that no agents will be created/destroyed at runtime! 8 | 9 | FLAGS = config.flags.FLAGS 10 | 11 | penalty = FLAGS.penalty 12 | agents = FLAGS.n_predator 13 | 14 | class MultiAgentEnv(gym.Env): 15 | metadata = { 16 | 'render.modes': ['human', 'rgb_array'] 17 | } 18 | 19 | def __init__(self, world, reset_callback=None, reward_callback=None, 20 | observation_callback=None, info_callback=None, 21 | done_callback=None, shared_viewer=True): 22 | 23 | self.world = world 24 | self.agents = self.world.agents 25 | # set required vectorized gym env property 26 | self.n = len(world.agents) 27 | # scenario callbacks 28 | self.reset_callback = reset_callback 29 | self.reward_callback = reward_callback 30 | self.observation_callback = observation_callback 31 | self.info_callback = info_callback 32 | self.done_callback = done_callback 33 | 34 | # environment parameters 35 | self.discrete_comm_space = True 36 | self.time = 0 37 | 38 | # configure spaces 39 | self.action_space = [] 40 | self.observation_space = [] 41 | self.agent_precedence = [] 42 | for agent in self.agents: 43 | self.agent_precedence.append(agent.itype) 44 | total_action_space = [] 45 | u_action_space = spaces.Discrete(world.dim_p * 2 + 1) 46 | total_action_space.append(u_action_space) 47 | 48 | # communication action space 49 | if self.discrete_comm_space: 50 | c_action_space = spaces.Discrete(world.dim_c) 51 | else: 52 | c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,)) 53 | 54 | if not agent.silent: 55 | total_action_space.append(c_action_space) 56 | 57 | # total action space 58 | if len(total_action_space) > 1: 59 | # all action spaces are discrete, so simplify to MultiDiscrete action space 60 | if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]): 61 | act_space = spaces.MultiDiscrete([act_space.n for act_space in total_action_space]) 62 | else: 63 | act_space = spaces.Tuple(total_action_space) 64 | self.action_space.append(act_space) 65 | else: 66 | self.action_space.append(total_action_space[0]) 67 | 68 | # observation space 69 | obs_dim = len(observation_callback(agent, self.world).flatten()) 70 | self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,))) 71 | agent.action.c = np.zeros(self.world.dim_c) 72 | 73 | def get_agent_profile(self): 74 | agent_profile = {} 75 | 76 | for i, agent in enumerate(self.agents): 77 | if agent.itype in agent_profile: 78 | agent_profile[agent.itype]['n_agent'] += 1 79 | agent_profile[agent.itype]['idx'].append(i) 80 | else: 81 | if isinstance(self.action_space[i], spaces.Discrete): 82 | act_space = self.action_space[i].n 83 | com_space = 0 84 | else: 85 | act_space = self.action_space[i].nvec[0] 86 | com_space = self.action_space[i].nvec[1] 87 | 88 | agent_profile[agent.itype] = { 89 | 'n_agent': 1, 90 | 'idx': [i], 91 | 'act_dim': act_space, 92 | 'com_dim': com_space, 93 | 'obs_dim': self.observation_space[i].shape 94 | } 95 | 96 | return agent_profile 97 | 98 | def step(self, action_n): 99 | obs_n = [] 100 | reward_n = [] 101 | done_n = [] 102 | info_n = {'n': []} 103 | 104 | self.agents = self.world.agents 105 | self.world.step(action_n) 106 | 107 | for agent in self.agents: 108 | obs_n.append(self._get_obs(agent)) 109 | reward_n.append(self._get_reward(agent)) 110 | done_n.append(self._get_done(agent)) 111 | info_n['n'].append(self._get_info(agent)) 112 | 113 | return obs_n, reward_n, done_n, info_n 114 | 115 | def reset(self): 116 | # reset world 117 | self.reset_callback(self.world) 118 | 119 | obs_n = [] 120 | for agent in self.agents: 121 | obs_n.append(self._get_obs(agent)) 122 | return obs_n 123 | 124 | # get info used for benchmarking 125 | def _get_info(self, agent): 126 | if self.info_callback is None: 127 | return {} 128 | return self.info_callback(agent, self.world) 129 | 130 | # get observation for a particular agent 131 | def _get_obs(self, agent): 132 | if self.observation_callback is None: 133 | return np.zeros(0) 134 | return self.observation_callback(agent, self.world) 135 | 136 | # get dones for a particular agent 137 | # unused right now -- agents are allowed to go beyond the viewing screen 138 | def _get_done(self, agent): 139 | if self.done_callback is None: 140 | return False 141 | return self.done_callback(agent, self.world) 142 | 143 | # get reward for a particular agent 144 | def _get_reward(self, agent): 145 | if self.reward_callback is None: 146 | return 0.0 147 | return self.reward_callback(agent, self.world) 148 | 149 | def get_full_encoding(self): 150 | return self.world.get_full_encoding() 151 | 152 | class MultiAgentSimpleEnv1(gym.Env): 153 | def __init__(self, n_predator=1): 154 | 155 | # self.state = [0] 156 | self.action_dim = 2 157 | self.state_dim = 3 158 | 159 | self.state = np.array([1,0,0]) 160 | self.payoff_1 = np.array([[7.,7.],[7.,7.]]) 161 | # self.payoff_2 = np.array([[0.,1.],[1.,8.]]) 162 | self.payoff_2 = np.array([[0.,1.],[1.,8.]]) 163 | 164 | def reset(self): 165 | 166 | self.state = np.array([1,0,0]) 167 | 168 | return self.state 169 | 170 | def step(self, action): 171 | 172 | info = {'n': []} 173 | reward = [] 174 | done = [] 175 | 176 | if self.state[0] == 1: 177 | if action[0] == 0: 178 | self.state = np.array([0,1,0]) 179 | reward.append(0.) 180 | done.append(False) 181 | elif action[0] == 1: 182 | self.state = np.array([0,0,1]) 183 | reward.append(0.) 184 | done.append(False) 185 | elif self.state[1] == 1: 186 | self.state = np.array([0,0,0]) 187 | reward.append(self.payoff_1[action[0],action[1]]) 188 | done.append(True) 189 | elif self.state[2] == 1: 190 | self.state = np.array([0,0,0]) 191 | reward.append(self.payoff_2[action[0],action[1]]) 192 | done.append(True) 193 | 194 | return self.state, reward, done, info 195 | 196 | def call_action_dim(self): 197 | return self.action_dim 198 | 199 | def call_state_dim(self): 200 | return self.state_dim 201 | 202 | 203 | class MultiAgentSimpleEnv2(gym.Env): #Matrix game 204 | def __init__(self, n_predator=1): 205 | 206 | self.state = [1] 207 | self.action_dim = 3 208 | self.state_dim = 1 209 | 210 | self.payoff2 = np.array([[8.,-12.,-12.],[-12.,0.,0.],[-12.,0.,0.]]) 211 | 212 | 213 | 214 | def reset(self): 215 | 216 | self.state = [1] 217 | 218 | return self.state 219 | 220 | def step(self, action): 221 | 222 | info = {'n': []} 223 | reward = [] 224 | done = [] 225 | reward.append(self.payoff2[action[0],action[1]]) 226 | self.state = [3] 227 | done.append(True) 228 | 229 | return self.state, reward, done, info 230 | 231 | def call_action_dim(self): 232 | return self.action_dim 233 | 234 | def call_state_dim(self): 235 | return self.state_dim 236 | 237 | class MultiAgentSimpleEnv3(gym.Env): #Partial observation 238 | def __init__(self, n_predator=1): 239 | 240 | self.state = [0] 241 | self.action_dim = 3 242 | self.state_dim = 1 243 | 244 | 245 | 246 | self.payoff1 = np.array([[10,8,5],[8,6,3],[5,3,0]]) 247 | self.payoff2 = np.array([[0,3,5],[3,6,8],[5,8,10]]) 248 | 249 | # self.payoff1 = np.array([[6,7],[8,9]]) 250 | # self.payoff2 = np.array([[0,1 - penalty],[1 - penalty,10]]) 251 | 252 | 253 | if np.random.randint(2) == 0: 254 | self.state = [1] 255 | else: 256 | self.state = [2] 257 | def reset(self): 258 | 259 | if np.random.randint(2) == 0: 260 | self.state = [1] 261 | else: 262 | self.state = [2] 263 | # self.state = [0] 264 | 265 | return self.state 266 | 267 | def step(self, action): 268 | 269 | info = {'n': []} 270 | reward = [] 271 | done = [] 272 | 273 | if self.state[0] == 1: 274 | reward.append(self.payoff1[action[0],action[1]]) 275 | self.state = [3] 276 | done.append(True) 277 | else: 278 | reward.append(self.payoff2[action[0],action[1]]) 279 | self.state = [3] 280 | done.append(True) 281 | # self.state = [3] 282 | # done.append(True) 283 | # print self.state[0], action[0], action[1], reward 284 | return self.state, reward, done, info 285 | 286 | def call_action_dim(self): 287 | return self.action_dim 288 | 289 | def call_state_dim(self): 290 | return self.state_dim 291 | 292 | 293 | class MultiAgentSimpleEnv4(gym.Env): #Gaussian Squeeze 294 | def __init__(self, n_predator=1): 295 | 296 | self.state = np.random.uniform(0.,2.,agents) 297 | 298 | self.action_dim = 11 299 | 300 | self.state_dim = agents 301 | 302 | 303 | 304 | def reset(self): 305 | 306 | self.state = np.random.uniform(0.,2.,agents) 307 | 308 | 309 | return self.state 310 | 311 | def step(self, action): 312 | 313 | info = {'n': []} 314 | reward = [] 315 | done = [] 316 | 317 | r = np.sum(np.array(action) * self.state)/agents 318 | 319 | 320 | if penalty == 1: 321 | rv = r * np.exp( -np.square(r-5) / 1) + r * np.exp( -np.square(r-8) / 0.25) 322 | else: 323 | rv = r * np.exp( -np.square(r-8) / 0.25) 324 | 325 | 326 | reward.append(rv) 327 | 328 | done.append(True) 329 | 330 | return self.state, reward, done, info 331 | 332 | def call_action_dim(self): 333 | return self.action_dim 334 | 335 | def call_state_dim(self): 336 | return self.state_dim 337 | -------------------------------------------------------------------------------- /Others/envs/grid_core.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import config 3 | 4 | 5 | COLOR_TO_IDX = config.COLOR_TO_IDX 6 | OBJECT_TO_IDX = config.OBJECT_TO_IDX 7 | 8 | N = 0 9 | E = 1 10 | O = 2 11 | W = 3 12 | S = 4 13 | 14 | # action of the agent 15 | class Action(object): 16 | def __init__(self): 17 | # physical action 18 | self.u = None 19 | # communication action 20 | self.c = None 21 | 22 | # properties and state of physical world entity 23 | class Entity(object): 24 | def __init__(self, itype, color): 25 | assert itype in OBJECT_TO_IDX, itype 26 | assert color in COLOR_TO_IDX, color 27 | self.itype = itype 28 | self.color = color 29 | self.contains = None 30 | 31 | # name 32 | self.name = '' 33 | # properties: 34 | self.movable = False 35 | # entity collides with others 36 | self.collide = True 37 | # material density (affects mass) 38 | self.density = 25.0 39 | 40 | @property 41 | def pos(self): 42 | return self._x, self._y 43 | 44 | def set_pos(self, x, y): 45 | self._x = x 46 | self._y = y 47 | 48 | # properties of agent entities 49 | class CoreAgent(Entity): 50 | def __init__(self, itype='agent', color='green'): 51 | super(CoreAgent, self).__init__(itype, color) 52 | self.name = "" 53 | # agents are movable by default 54 | self.movable = True 55 | # cannot send communication signals 56 | self.silent = True 57 | # action 58 | self.action = Action() 59 | # if waiting for other agents action 60 | self.waiting = False 61 | # if done doing its action in the current step 62 | self.done_moving = False 63 | # if the intended step collided 64 | self.collided = False 65 | 66 | self._obs = None 67 | self._x = 0 68 | self._y = 0 69 | self.obs_range = 1 70 | 71 | def update_obs(self, obs): 72 | self._obs = obs 73 | 74 | def get_obs(self): 75 | return self._obs 76 | 77 | class Wall(Entity): 78 | def __init__(self, color='grey'): 79 | super(Wall, self).__init__('wall', color) 80 | 81 | class Grid(object): 82 | """ 83 | Represent a grid and operations on it 84 | """ 85 | 86 | def __init__(self, width, height): 87 | assert width >= 2 88 | assert height >= 2 89 | 90 | self.width = width 91 | self.height = height 92 | self.reset() 93 | 94 | def set(self, i, j, v): 95 | assert i >= 0 and i < self.width 96 | assert j >= 0 and j < self.height 97 | self.grid[j * self.width + i] = v 98 | 99 | def get(self, i, j): 100 | if ((i >= 0 and i < self.width) and \ 101 | (j >= 0 and j < self.height)): 102 | return self.grid[j * self.width + i] 103 | 104 | return Wall() 105 | 106 | def reset(self): 107 | self.grid = [None] * self.width * self.height 108 | 109 | def setHorzWall(self, x, y, length=None): 110 | if length is None: 111 | length = self.width - x 112 | for i in range(0, length): 113 | self.set(x + i, y, Wall()) 114 | 115 | def setVertWall(self, x, y, length=None): 116 | if length is None: 117 | length = self.height - y 118 | for j in range(0, length): 119 | self.set(x, y + j, Wall()) 120 | 121 | def wallRect(self, x, y, w, h): 122 | self.setHorzWall(x, y, w) 123 | self.setHorzWall(x, y+h-1, w) 124 | self.setVertWall(x, y, h) 125 | self.setVertWall(x+w-1, y, h) 126 | 127 | def slice(self, topX, topY, width, height): 128 | """ 129 | Get a subset of the grid 130 | """ 131 | 132 | grid = Grid(width, height) 133 | 134 | for j in range(0, height): 135 | for i in range(0, width): 136 | x = topX + i 137 | y = topY + j 138 | 139 | if x >= 0 and x < self.width and \ 140 | y >= 0 and y < self.height: 141 | v = self.get(x, y) 142 | else: 143 | v = Wall() 144 | 145 | grid.set(i, j, v) 146 | 147 | return grid 148 | 149 | def encode(self): 150 | """ 151 | Produce a compact numpy encoding of the grid 152 | """ 153 | 154 | array = np.zeros(shape=(self.height, self.width, 3), dtype=np.int8) 155 | 156 | for j in range(0, self.height): 157 | for i in range(0, self.width): 158 | 159 | v = self.get(i, j) 160 | if isinstance(v, CoreAgent): 161 | array[j, i, 2] = v.id 162 | 163 | if v == None: 164 | continue 165 | 166 | 167 | array[j, i, 0] = OBJECT_TO_IDX[v.itype] 168 | array[j, i, 1] = COLOR_TO_IDX[v.color] 169 | 170 | return array 171 | 172 | # multi-agent world 173 | class World(object): 174 | def __init__(self, width, height): 175 | # list of agents and entities (can change at execution-time!) 176 | self.agents = [] 177 | 178 | # communication channel dimensionality 179 | self.dim_c = 0 180 | # position dimensionality 181 | self.dim_p = 2 182 | 183 | self.width = width 184 | self.height = height 185 | 186 | self.grid = Grid(self.width, self.height) 187 | self.grid.wallRect(0, 0, self.width, self.height) 188 | 189 | self.step_cnt = 0 190 | 191 | def empty_grid(self): 192 | self.step_cnt = 0 193 | self.grid.reset() 194 | 195 | def placeObj(self, obj, top=None, size=None, reject_fn=None): 196 | """ 197 | Place an object at an empty position in the grid 198 | 199 | :param top: top-left position of the rectangle where to place 200 | :param size: size of the rectangle where to randomly place 201 | :param reject_fn: function to filter out potential positions 202 | """ 203 | 204 | if top is None: 205 | top = (0, 0) 206 | 207 | if size is None: 208 | size = (self.grid.width, self.grid.height) 209 | 210 | while True: 211 | pos = ( 212 | np.random.randint(top[0], top[0] + size[0]), 213 | np.random.randint(top[1], top[1] + size[1]) 214 | ) 215 | 216 | # Don't place the object on top of another object 217 | if self.grid.get(*pos) != None: 218 | continue 219 | 220 | # Check if there is a filtering criterion 221 | if reject_fn and reject_fn(self, pos): 222 | continue 223 | 224 | break 225 | 226 | self.grid.set(pos[0], pos[1], obj) 227 | obj.set_pos(pos[0], pos[1]) 228 | return pos 229 | 230 | def resetObj(self, obj, top=None, size=None, reject_fn=None): 231 | """ 232 | Reset an object at an empty position in the grid 233 | 234 | :param top: top-left position of the rectangle where to place 235 | :param size: size of the rectangle where to randomly place 236 | :param reject_fn: function to filter out potential positions 237 | """ 238 | if top is None: 239 | top = (0, 0) 240 | 241 | if size is None: 242 | size = (self.grid.width, self.grid.height) 243 | 244 | while True: 245 | pos = ( 246 | np.random.randint(top[0], top[0] + size[0]), 247 | np.random.randint(top[1], top[1] + size[1]) 248 | ) 249 | 250 | # Don't place the object on top of another object 251 | if self.grid.get(*pos) != None: 252 | continue 253 | 254 | # Check if there is a filtering criterion 255 | if reject_fn and reject_fn(self, pos): 256 | continue 257 | 258 | break 259 | x, y = obj.pos 260 | self.grid.set(x, y, None) 261 | self.grid.set(pos[0], pos[1], obj) 262 | obj.set_pos(pos[0], pos[1]) 263 | return pos 264 | 265 | def single_agent_step(self, agent, action): 266 | if agent.done_moving or agent.waiting: 267 | return 268 | 269 | x, y = agent.pos 270 | action = agent.action.u 271 | 272 | if action == N: 273 | y -= 1 274 | elif action == E: 275 | x -= 1 276 | elif action == W: 277 | x += 1 278 | elif action == S: 279 | y += 1 280 | elif action == O: 281 | agent.done_moving = True 282 | agent.collided = False 283 | return 284 | 285 | intended_cell = self.grid.get(x, y) 286 | if isinstance(intended_cell, CoreAgent): 287 | agent.waiting = True 288 | # let the other agent move first 289 | self.single_agent_step(intended_cell, intended_cell.action.u) 290 | agent.waiting = False 291 | # get the intended cell (to check if it is empty) 292 | intended_cell = self.grid.get(x, y) 293 | 294 | # check if the intended cell is empty 295 | if not intended_cell is None: 296 | agent.collided = True 297 | else: 298 | x_0, y_0 = agent.pos 299 | self.grid.set(x_0, y_0, None) 300 | self.grid.set(x, y, agent) 301 | agent.set_pos(x, y) 302 | agent.collided = False 303 | 304 | agent.done_moving = True 305 | 306 | # update state of the world 307 | def step(self, action_n): 308 | self.step_cnt += 1 309 | # set the action 310 | for i, agent in enumerate(self.agents): 311 | agent.action.u = action_n[i] 312 | agent.done_moving = False 313 | 314 | # do the action 315 | for agent in self.agents: 316 | self.single_agent_step(agent, agent.action.u) 317 | 318 | # update observations of all agents 319 | self.set_observations() 320 | 321 | def set_observations(self): 322 | for agent in self.agents: 323 | x, y = agent.pos 324 | r = agent.obs_range 325 | obs = self.grid.slice(x-r, y-r,r*2+1,r*2+1) 326 | agent.update_obs(obs.encode()) 327 | 328 | def get_full_encoding(self): 329 | return self.grid.encode() -------------------------------------------------------------------------------- /Others/envs/scenario.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # defines scenario upon which the world is built 4 | class BaseScenario(object): 5 | # create elements of the world 6 | def make_world(self): 7 | raise NotImplementedError() 8 | # create initial conditions of the world 9 | def reset_world(self, world): 10 | raise NotImplementedError() 11 | -------------------------------------------------------------------------------- /Others/envs/scenarios/__init__.py: -------------------------------------------------------------------------------- 1 | import imp 2 | import os.path as osp 3 | 4 | 5 | def load(name): 6 | pathname = osp.join(osp.dirname(__file__), name) 7 | return imp.load_source('', pathname) 8 | -------------------------------------------------------------------------------- /Others/envs/scenarios/endless.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import deque 3 | from envs.grid_core import World 4 | from envs.grid_core import CoreAgent as Agent 5 | from envs.scenario import BaseScenario 6 | import config 7 | 8 | FLAGS = config.flags.FLAGS 9 | 10 | n_predator = FLAGS.n_predator 11 | n_prey = FLAGS.n_prey 12 | map_size = FLAGS.map_size 13 | 14 | class Prey(Agent): 15 | def __init__(self): 16 | super(Prey, self).__init__("prey", "green") 17 | self._movement_mask = np.array( 18 | [[0,1,0], 19 | [1,0,1], 20 | [0,1,0]], dtype=np.int8) 21 | 22 | def cannot_move(self): 23 | minimap = (self._obs[:,:,0] != 0) 24 | return np.sum(minimap*self._movement_mask)==4 25 | 26 | def can_observe_predator(self): 27 | shape = np.shape(self._obs[:,:,0]) 28 | obs_size = shape[0]*shape[1] 29 | obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size) 30 | ret = np.shape(np.where(obs == 3))[1] > 0 31 | return ret 32 | 33 | def can_observe_two_predator(self): 34 | shape = np.shape(self._obs[:,:,0]) 35 | obs_size = shape[0]*shape[1] 36 | obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size) 37 | ret = np.shape(np.where(obs == 3))[1] > 1 38 | return ret 39 | 40 | class Predator(Agent): 41 | def __init__(self): 42 | super(Predator, self).__init__("predator", "blue") 43 | self._obs = deque(maxlen=FLAGS.history_len) 44 | self.obs_range = 1 45 | 46 | def can_observe_prey(self): 47 | shape = np.shape(self._obs) 48 | obs_size = shape[1]*shape[2] 49 | obs = np.reshape(self._obs, obs_size) 50 | ret = np.shape(np.where(obs == 4))[1] > 0 51 | return ret 52 | 53 | def update_obs(self, obs): 54 | self._obs.append(obs[:,:,0]) # use only the first channel 55 | 56 | def fill_obs(self): 57 | # fill the whole history with the current observation 58 | for i in range(FLAGS.history_len-1): 59 | self._obs.append(self._obs[-1]) 60 | 61 | class Scenario(BaseScenario): 62 | def __init__(self): 63 | self.prey_captured = False 64 | 65 | def make_world(self): 66 | world = World(width=map_size, height=map_size) 67 | 68 | agents = [] 69 | self.atype_to_idx = { 70 | "predator": [], 71 | "prey": [] 72 | } 73 | 74 | # add predators 75 | for i in xrange(n_predator): 76 | agents.append(Predator()) 77 | self.atype_to_idx["predator"].append(i) 78 | 79 | # add preys 80 | for i in xrange(n_prey): 81 | agents.append(Prey()) 82 | self.atype_to_idx["prey"].append(n_predator + i) 83 | 84 | world.agents = agents 85 | for i, agent in enumerate(world.agents): 86 | agent.id = i + 1 87 | agent.silent = True 88 | 89 | # make initial conditions 90 | self.reset_world(world) 91 | return world 92 | 93 | def reset_world(self, world): 94 | world.empty_grid() 95 | 96 | # randomly place agent 97 | for agent in world.agents: 98 | world.placeObj(agent) 99 | 100 | world.set_observations() 101 | 102 | # fill the history with current observation 103 | for i in self.atype_to_idx["predator"]: 104 | world.agents[i].fill_obs() 105 | 106 | self.prey_captured = False 107 | 108 | def reward(self, agent, world): 109 | if agent.itype == "predator": 110 | # if self.prey_captured: 111 | # # return max(10 - world.step_cnt, 0) 112 | # return 1 113 | # else: 114 | # reward = -0.01 115 | # for i in self.atype_to_idx["prey"]: 116 | # prey = world.agents[i] 117 | # if prey.cannot_move(): 118 | # reward = 1 119 | # world.resetObj(prey) 120 | # return reward 121 | # # kdw - Use this for large map size 122 | # # if agent.can_observe_prey(): 123 | # # reward = 0.0 124 | # return reward 125 | reward = -0.001 126 | 127 | for i in self.atype_to_idx["prey"]: 128 | prey = world.agents[i] 129 | if prey.can_observe_predator(): 130 | #world.resetObj(prey) 131 | reward += 0.1 132 | return reward 133 | 134 | else: # if prey 135 | if agent.cannot_move(): 136 | return -1 137 | 138 | return 0 139 | 140 | def observation(self, agent, world): 141 | # print agent.get_obs.shape 142 | obs = np.array(agent.get_obs()).flatten() 143 | return obs 144 | 145 | def done(self, agent, world): 146 | if agent.itype == "prey": 147 | if agent.can_observe_predator(): 148 | world.resetObj(agent) 149 | return False 150 | #return self.prey_captured -------------------------------------------------------------------------------- /Others/envs/scenarios/pursuit.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import deque 3 | from envs.grid_core import World 4 | from envs.grid_core import CoreAgent as Agent 5 | from envs.scenario import BaseScenario 6 | import config 7 | 8 | FLAGS = config.flags.FLAGS 9 | 10 | n_predator = FLAGS.n_predator 11 | n_prey = FLAGS.n_prey 12 | map_size = FLAGS.map_size 13 | 14 | class Prey(Agent): 15 | def __init__(self): 16 | super(Prey, self).__init__("prey", "green") 17 | self._movement_mask = np.array( 18 | [[0,1,0], 19 | [1,0,1], 20 | [0,1,0]], dtype=np.int8) 21 | 22 | def cannot_move(self): 23 | minimap = (self._obs[:,:,0] != 0) 24 | return np.sum(minimap*self._movement_mask)==4 25 | 26 | class Predator(Agent): 27 | def __init__(self): 28 | super(Predator, self).__init__("predator", "blue") 29 | self._obs = deque(maxlen=FLAGS.history_len) 30 | self.obs_range = 1 31 | 32 | def can_observe_prey(self): 33 | shape = np.shape(self._obs) 34 | obs_size = shape[1]*shape[2] 35 | obs = np.reshape(self._obs, obs_size) 36 | ret = np.shape(np.where(obs == 4))[1] > 0 37 | return ret 38 | 39 | def update_obs(self, obs): 40 | self._obs.append(obs[:,:,0]) # use only the first channel 41 | 42 | def fill_obs(self): 43 | # fill the whole history with the current observation 44 | for i in range(FLAGS.history_len-1): 45 | self._obs.append(self._obs[-1]) 46 | 47 | class Scenario(BaseScenario): 48 | def __init__(self): 49 | self.prey_captured = False 50 | 51 | def make_world(self): 52 | world = World(width=map_size, height=map_size) 53 | 54 | agents = [] 55 | self.atype_to_idx = { 56 | "predator": [], 57 | "prey": [] 58 | } 59 | 60 | # add predators 61 | for i in xrange(n_predator): 62 | agents.append(Predator()) 63 | self.atype_to_idx["predator"].append(i) 64 | 65 | # add preys 66 | for i in xrange(n_prey): 67 | agents.append(Prey()) 68 | self.atype_to_idx["prey"].append(n_predator + i) 69 | 70 | world.agents = agents 71 | for i, agent in enumerate(world.agents): 72 | agent.id = i + 1 73 | agent.silent = True 74 | 75 | # make initial conditions 76 | self.reset_world(world) 77 | return world 78 | 79 | def reset_world(self, world): 80 | world.empty_grid() 81 | 82 | # randomly place agent 83 | for agent in world.agents: 84 | world.placeObj(agent) 85 | 86 | world.set_observations() 87 | 88 | # fill the history with current observation 89 | for i in self.atype_to_idx["predator"]: 90 | world.agents[i].fill_obs() 91 | 92 | self.prey_captured = False 93 | 94 | def reward(self, agent, world): 95 | if agent.itype == "predator": 96 | if self.prey_captured: 97 | # return max(10 - world.step_cnt, 0) 98 | return 1 99 | else: 100 | reward = -0.01 101 | for i in self.atype_to_idx["prey"]: 102 | prey = world.agents[i] 103 | if prey.cannot_move(): 104 | # print "captured" 105 | self.prey_captured = True 106 | reward = 1 107 | return reward 108 | # kdw - Use this for large map size 109 | # if agent.can_observe_prey(): 110 | # reward = 0.0 111 | return reward 112 | else: # if prey 113 | if agent.cannot_move(): 114 | return -1 115 | 116 | return 0 117 | 118 | def observation(self, agent, world): 119 | # print agent.get_obs.shape 120 | obs = np.array(agent.get_obs()).flatten() 121 | return obs 122 | 123 | def done(self, agent, world): 124 | return self.prey_captured -------------------------------------------------------------------------------- /Others/envs/scenarios/single_agent.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from envs.scenarios.pursuit import Scenario as BaseScenario 3 | import config 4 | 5 | FLAGS = config.flags.FLAGS 6 | map_size = FLAGS.map_size 7 | 8 | class Scenario(BaseScenario): 9 | def __init__(self): 10 | super(Scenario, self).__init__() 11 | print "Single agent scenario" 12 | 13 | def reset_world(self, world): 14 | world.empty_grid() 15 | 16 | prey_pos = [0, 0] 17 | 18 | prey_idx = self.atype_to_idx["prey"][0] 19 | world.placeObj(world.agents[prey_idx], top=prey_pos, size=(1,1)) 20 | 21 | top = ((prey_pos[0]+1)%map_size, (prey_pos[1]+1)%map_size) 22 | 23 | world.placeObj(world.agents[0], top=top, size=(2, 2)) 24 | world.placeObj(world.agents[1], top=[0, 1], size=(1, 1)) 25 | 26 | world.set_observations() 27 | 28 | # fill the history with current observation 29 | for i in self.atype_to_idx["predator"]: 30 | world.agents[i].fill_obs() 31 | 32 | self.prey_captured = False 33 | -------------------------------------------------------------------------------- /Others/envs/scenarios/static_prey.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from envs.scenarios.pursuit import Scenario as BaseScenario 3 | import config 4 | 5 | FLAGS = config.flags.FLAGS 6 | map_size = FLAGS.map_size 7 | 8 | class Scenario(BaseScenario): 9 | def __init__(self): 10 | super(Scenario, self).__init__() 11 | 12 | def reset_world(self, world): 13 | world.empty_grid() 14 | 15 | # prey_pos = np.random.choice([map_size - 1, 0], 2) 16 | prey_pos = [0, 0] 17 | prey_idx = self.atype_to_idx["prey"][0] 18 | world.placeObj(world.agents[prey_idx], top=prey_pos, size=(1,1)) 19 | 20 | top = ((prey_pos[0]+1)%map_size, (prey_pos[1]+1)%map_size) 21 | for idx in self.atype_to_idx["predator"]: 22 | world.placeObj(world.agents[idx], top=top, size=(2,2)) 23 | 24 | world.set_observations() 25 | 26 | # fill the history with current observation 27 | for i in self.atype_to_idx["predator"]: 28 | world.agents[i].fill_obs() 29 | 30 | self.prey_captured = False 31 | -------------------------------------------------------------------------------- /Others/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf8 3 | import logging 4 | import make_env 5 | import agents 6 | import config 7 | import time 8 | import random 9 | import tensorflow as tf 10 | import numpy as np 11 | 12 | FLAGS = config.flags.FLAGS 13 | 14 | def set_seed(seed): 15 | """Initialized the random seeds 16 | """ 17 | random.seed(seed) 18 | np.random.seed(seed) 19 | tf.set_random_seed(seed) 20 | return None 21 | 22 | 23 | if __name__ == '__main__': 24 | 25 | set_seed(FLAGS.seed) 26 | 27 | # === Logging setup === # 28 | logger_env = logging.getLogger('SimpleMARL') 29 | logger_agent = logging.getLogger('Agent') 30 | 31 | # === Program start === # 32 | # Load environment 33 | env = make_env.make_env(FLAGS.scenario) 34 | logger_env.info('SimpleMARL Start with %d predator(s) and %d prey(s)', FLAGS.n_predator, FLAGS.n_prey) 35 | 36 | # Load trainer 37 | logger_agent.info('Agent: {}'.format(FLAGS.agent)) 38 | trainer = agents.load(FLAGS.agent+"/trainer.py").Trainer(env) 39 | 40 | # print FLAGS.agent, config.file_name 41 | 42 | # start learning 43 | if FLAGS.train: 44 | start_time = time.time() 45 | trainer.learn() 46 | finish_time = time.time() 47 | # trainer.test() 48 | # print "TRAINING TIME (sec)", finish_time - start_time 49 | else: 50 | trainer.test() 51 | -------------------------------------------------------------------------------- /Others/make_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for creating a multiagent environment with one of the scenarios listed 3 | in ./scenarios/. 4 | Can be called by using, for example: 5 | env = make_env('simple_speaker_listener') 6 | After producing the env object, can be used similarly to an OpenAI gym 7 | environment. 8 | 9 | A policy using this environment must output actions in the form of a list 10 | for all agents. Each element of the list should be a numpy array, 11 | of size (env.world.dim_p + env.world.dim_c, 1). Physical actions precede 12 | communication actions in this array. See environment.py for more details. 13 | """ 14 | 15 | def make_env(scenario_name, benchmark=False): 16 | ''' 17 | Creates a MultiAgentEnv object as env. This can be used similar to a gym 18 | environment by calling env.reset() and env.step(). 19 | Use env.render() to view the environment on the screen. 20 | 21 | Input: 22 | scenario_name : name of the scenario from ./scenarios/ to be Returns 23 | (without the .py extension) 24 | benchmark : whether you want to produce benchmarking data 25 | (usually only done during evaluation) 26 | 27 | Some useful env properties (see environment.py): 28 | .observation_space : Returns the observation space for each agent 29 | .action_space : Returns the action space for each agent 30 | .n : Returns the number of Agents 31 | ''' 32 | # from envs.environment import MultiAgentEnv 33 | import envs.scenarios as scenarios 34 | from envs.environment import MultiAgentSimpleEnv2 as MAS 35 | import config 36 | 37 | env = MAS() 38 | 39 | return env 40 | -------------------------------------------------------------------------------- /Others/readme: -------------------------------------------------------------------------------- 1 | Training 2 | 3 | $algorithm = vdn, qmix, pqmix5(=QTRAN-alt in the paper), pqmix7(=QTRAN in the paper) 4 | 5 | python main.py --agent pos_cac_fo --training_step 10000 --b_size 10000 --m_size 32 --seed 0 --algorithm $algorithm --penalty 0 6 | 7 | 8 | In make_env.py 9 | 10 | (i) Matrix game 11 | 12 | from envs.environment import MultiAgentSimpleEnv2 as MAS 13 | 14 | (i) Gaussian Squeeze 15 | 16 | from envs.environment import MultiAgentSimpleEnv4 as MAS 17 | 18 | 19 | -------------------------------------------------------------------------------- /Others/run_DQN2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | #for seed in 1 2 3 4 5 4 | #do 5 | CUDA_VISIBLE_DEVICES=$1 python main.py --agent pos_cac_fo --training_step 8000 --b_size 10000 --m_size 64 --seed 6 --algorithm $2 --penalty $3 & 6 | #done 7 | 8 | -------------------------------------------------------------------------------- /Others/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os,sys 3 | sys.path.insert(1, os.path.join(sys.path[0], '..')) 4 | import argparse 5 | 6 | from envs.environment import MultiAgentEnv 7 | import envs.scenarios as scenarios 8 | import numpy as np 9 | import config 10 | 11 | FLAGS = config.flags.FLAGS 12 | 13 | 14 | if __name__ == '__main__': 15 | # parse arguments 16 | parser = argparse.ArgumentParser(description=None) 17 | parser.add_argument('-s', '--scenario', default='pursuit.py', help='Path of the scenario Python script.') 18 | args = parser.parse_args() 19 | 20 | # load scenario from script 21 | scenario = scenarios.load(args.scenario).Scenario() 22 | # create world 23 | world = scenario.make_world() 24 | # create multiagent environment 25 | env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, done_callback=scenario.done) 26 | act_n = [2, 2] 27 | print "action space:", env.action_space[0].n 28 | print "observation space:", env.observation_space 29 | 30 | obs_n = env.reset()[:2] 31 | print env.get_agent_profile() 32 | print env.get_full_encoding()[:, :, 2] 33 | imap = np.array(obs_n).reshape((2, FLAGS.history_len,3,3,1)) 34 | 35 | minimap = imap[:,:,:,:,0] 36 | print minimap[0, -1] 37 | print minimap[1, -1] 38 | 39 | while True: 40 | a0 = input("action of agent 0:") 41 | a1 = input("action of agent 1:") 42 | act_n = [a0, a1, 2] 43 | obs_n, reward_n, done_n, info_n = env.step(act_n) 44 | obs_n = obs_n[:2] 45 | 46 | 47 | print env.get_full_encoding()[:,:,2] 48 | imap = np.array(obs_n).reshape((2, FLAGS.history_len,3,3,1)) 49 | 50 | minimap = imap[:,:,:,:,0] 51 | print minimap[0, -1] 52 | print minimap[1, -1] 53 | 54 | 55 | print reward_n, done_n 56 | 57 | -------------------------------------------------------------------------------- /Predator-Prey/agents/__init__.py: -------------------------------------------------------------------------------- 1 | import imp 2 | import os.path as osp 3 | import logging 4 | 5 | 6 | def load(name): 7 | pathname = osp.join(osp.dirname(__file__), name) 8 | return imp.load_source('', pathname) 9 | 10 | 11 | logger_agent = logging.getLogger('Agent') 12 | logger_agent.setLevel(logging.INFO) 13 | fh_agent = logging.FileHandler('./agent.log') 14 | sh = logging.StreamHandler() 15 | fm = logging.Formatter('[%(levelname)s|%(filename)s:%(lineno)s] %(asctime)s > [%(name)s] %(message)s') 16 | fh_agent.setFormatter(fm) 17 | sh.setFormatter(fm) 18 | logger_agent.addHandler(fh_agent) 19 | logger_agent.addHandler(sh) 20 | -------------------------------------------------------------------------------- /Predator-Prey/agents/config_agents.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf8 3 | # import agents 4 | 5 | 6 | def config_agent(_flags): 7 | flags = _flags 8 | 9 | flags.DEFINE_string("agent", "cac_fo", "Agent") 10 | 11 | flags.DEFINE_integer("training_step", 500000, "Training time step") 12 | flags.DEFINE_integer("testing_step", 1000, "Testing time step") 13 | flags.DEFINE_integer("max_step", 200, "Maximum time step per episode") 14 | flags.DEFINE_integer("eval_step", 1000, "Number of steps before training") 15 | # flags.DEFINE_integer("training_step", 5000, "Training time step") 16 | # flags.DEFINE_integer("testing_step", 1000, "Testing time step") 17 | # flags.DEFINE_integer("max_step", 200, "Maximum time step per episode") 18 | # flags.DEFINE_integer("eval_step", 1000, "Number of steps before training") 19 | 20 | flags.DEFINE_integer("b_size", 10000, "Size of the replay memory") 21 | flags.DEFINE_integer("m_size", 32, "Minibatch size") 22 | flags.DEFINE_integer("pre_train_step", 300, "during [m_size * pre_step] take random action") 23 | flags.DEFINE_float("lr", 0.00025, "Learning rate") 24 | # flags.DEFINE_float("lr", 0.01, "Learning rate") # it is for single 25 | flags.DEFINE_float("df", 0.99, "Discount factor") 26 | 27 | flags.DEFINE_boolean("load_nn", False, "Load nn from file or not") 28 | flags.DEFINE_string("nn_file", "results/nn/n-2-s-endless3-map-5-penalty-10-a-pqmix5-lr-0.0005-ms-32-seed-28-0103231136-215-3000000", "The name of file for loading") 29 | 30 | flags.DEFINE_boolean("train", True, "Training or testing") 31 | flags.DEFINE_boolean("qtrace", False, "Use q trace") 32 | flags.DEFINE_boolean("kt", False, "Keyboard input test") 33 | flags.DEFINE_boolean("use_action_in_critic", False, "Use guided samples") 34 | flags.DEFINE_string("algorithm", "ddd", 35 | "Which agent to run, as a python path to an Agent class.") 36 | 37 | 38 | 39 | 40 | 41 | def get_filename(): 42 | import config 43 | FLAGS = config.flags.FLAGS 44 | 45 | return "a-"+FLAGS.algorithm+"-lr-"+str(FLAGS.lr)+"-ms-"+str(FLAGS.m_size) 46 | -------------------------------------------------------------------------------- /Predator-Prey/agents/evaluation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf8 3 | 4 | import numpy as np 5 | import logging 6 | import config 7 | 8 | FLAGS = config.flags.FLAGS 9 | result = logging.getLogger('Result') 10 | 11 | class Evaluation(object): 12 | 13 | def __init__(self): 14 | self.episode_cnt = 0 15 | self.m = dict() 16 | 17 | def update_value(self, m_key, m_value, m_append=None): 18 | if m_key in self.m: 19 | self.m[m_key]['value'] += m_value 20 | self.m[m_key]['cnt'] += 1 21 | else: 22 | self.m[m_key] = dict() 23 | self.m[m_key]['value'] = m_value 24 | self.m[m_key]['cnt'] = 1 25 | if m_append is None: 26 | result.info(m_key + "\t" + str(m_value)) 27 | else: 28 | result.info(m_key + "\t" + str(m_value) + "\t" + str(m_append)) 29 | 30 | def summarize(self, key=None): 31 | if key is None: 32 | for k in self.m: 33 | print "Average", k, float(self.m[k]['value'])/self.m[k]['cnt'] 34 | result.info("summary\t" + k + "\t" + str(float(self.m[k]['value']) / self.m[k]['cnt'])) 35 | 36 | elif key not in self.m: 37 | print "Wrong key" 38 | 39 | else: 40 | print "Average", key, float(self.m[key]['value']) / self.m[key]['cnt'] 41 | result.info("summary\t" + key + "\t" + str(float(self.m[key]['value'])/self.m[key]['cnt'])) 42 | -------------------------------------------------------------------------------- /Predator-Prey/agents/non_simple_agent.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class RandomAgent(object): 4 | def __init__(self, action_dim): 5 | self._action_dim = action_dim 6 | 7 | def act(self, obs): 8 | 9 | if np.random.rand() < 3./8. : 10 | return 2 11 | else: 12 | return np.random.randint(self._action_dim) 13 | 14 | # return 2 15 | 16 | def train(self, minibatch, step): 17 | return 18 | 19 | class StaticAgent(object): 20 | def __init__(self, action): 21 | self._action = action 22 | 23 | def act(self, obs): 24 | return self._action 25 | 26 | def train(self, minibatch, step): 27 | return 28 | -------------------------------------------------------------------------------- /Predator-Prey/agents/pos_cac_fo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sonkyunghwan/QTRAN/785c44ebc8379896dc9f513af2ac767d61013914/Predator-Prey/agents/pos_cac_fo/__init__.py -------------------------------------------------------------------------------- /Predator-Prey/agents/pos_cac_fo/agent.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf8 3 | 4 | """ 5 | =========================================== 6 | :mod:`qlearn` Q-Learning 7 | =========================================== 8 | 9 | ===== 10 | 11 | Choose action based on q-learning algorithm 12 | """ 13 | 14 | import numpy as np 15 | import tensorflow as tf 16 | import math 17 | from agents.pos_cac_fo.dq_network import * 18 | from agents.pos_cac_fo.replay_buffer import * 19 | from agents.evaluation import Evaluation 20 | 21 | import logging 22 | import config 23 | 24 | FLAGS = config.flags.FLAGS 25 | logger = logging.getLogger("Agent") 26 | result = logging.getLogger('Result') 27 | 28 | 29 | class Agent(object): 30 | 31 | def __init__(self, action_dim, obs_dim, name=""): 32 | logger.info("Centralized DQN Agent") 33 | 34 | self._n_predator = FLAGS.n_predator 35 | self._n_prey = FLAGS.n_prey 36 | self.map_size = FLAGS.map_size 37 | 38 | self._obs_dim = obs_dim 39 | 40 | self._action_dim = action_dim * self._n_predator 41 | self._action_dim_single = action_dim 42 | self._n_object = (self._n_predator + self._n_prey) 43 | self._state_dim = 2 * (self._n_predator + self._n_prey) 44 | self._state_dim_single = (self.map_size**2) 45 | 46 | self._name = name 47 | self.update_cnt = 0 48 | self.target_update_period = 10000 49 | 50 | self.df = FLAGS.df 51 | self.lr = FLAGS.lr 52 | 53 | # Make Q-network 54 | tf.reset_default_graph() 55 | my_graph = tf.Graph() 56 | 57 | with my_graph.as_default(): 58 | self.sess = tf.Session(graph=my_graph, config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) 59 | self.q_network = DQNetwork(self.sess, self._state_dim, self._action_dim_single, self._n_predator) 60 | self.sess.run(tf.global_variables_initializer()) 61 | self.saver = tf.train.Saver() 62 | if FLAGS.load_nn: 63 | print "LOAD!" 64 | self.saver.restore(self.sess, "./results/nn3/n-"+str(FLAGS.n_predator)+"-s-endless3-map-"+str(FLAGS.map_size)+"-penalty-"+str(FLAGS.penalty)+"-a-"+str(FLAGS.algorithm)+"-lr-0.0005-ms-32-seed-"+str(FLAGS.seed)+"-"+str(FLAGS.comment)) 65 | self.train_writer = tf.summary.FileWriter(config.tb_filename, self.sess.graph) 66 | 67 | self.replay_buffer = ReplayBuffer() 68 | 69 | self._eval = Evaluation() 70 | self.q_prev = None 71 | self.s_array = np.random.randint(self.map_size, size = (2 * (FLAGS.n_prey + FLAGS.n_predator), 100)) 72 | 73 | def act(self, state): 74 | 75 | predator_rand = np.random.permutation(FLAGS.n_predator) 76 | prey_rand = np.random.permutation(FLAGS.n_prey) 77 | 78 | s = self.state_to_index(state) 79 | 80 | action = self.q_network.get_action(s[None])[0] 81 | 82 | return action 83 | 84 | 85 | 86 | def train(self, state, action, reward, state_n, done): 87 | 88 | 89 | predator_rand = np.random.permutation(FLAGS.n_predator) 90 | prey_rand = np.random.permutation(FLAGS.n_prey) 91 | 92 | a = self.action_to_onehot(action) 93 | s = self.state_to_index(state) 94 | s_n = self.state_to_index(state_n) 95 | r = np.sum(reward) 96 | 97 | self.store_sample(s, a, r, s_n, done) 98 | 99 | self.update_network() 100 | return 0 101 | 102 | def store_sample(self, s, a, r, s_n, done): 103 | self.replay_buffer.add_to_memory((s, a, r, s_n, done)) 104 | return 0 105 | 106 | def update_network(self): 107 | self.update_cnt += 1 108 | if len(self.replay_buffer.replay_memory) < FLAGS.pre_train_step*minibatch_size: 109 | return 0 110 | 111 | minibatch = self.replay_buffer.sample_from_memory() 112 | self.q_network.training_qnet(minibatch) 113 | 114 | 115 | if self.update_cnt % self.target_update_period == 0: 116 | self.q_network.training_target_qnet() 117 | 118 | if self.update_cnt % 10000 == 0: 119 | self.saver.save(self.sess, config.nn_filename, self.update_cnt) 120 | 121 | return 0 122 | 123 | def state_to_index(self, state): 124 | """ 125 | For the single agent case, the state is only related to the position of agent 1 126 | :param state: 127 | :return: 128 | """ 129 | 130 | ret = np.zeros(self._state_dim) 131 | for i in range(FLAGS.n_predator + FLAGS.n_prey): 132 | p = np.argwhere(np.array(state)==i+1)[0] 133 | 134 | ret[2 * i] = (p[0] - FLAGS.map_size /2.) / FLAGS.map_size 135 | ret[2 * i + 1] = (p[1] - FLAGS.map_size /2.) / FLAGS.map_size 136 | 137 | 138 | return ret 139 | 140 | def get_predator_pos(self, state): 141 | """ 142 | return position of agent 1 and 2 143 | :param state: input is state 144 | :return: 145 | """ 146 | state_list = list(np.array(state).ravel()) 147 | return state_list.index(1), state_list.index(2) 148 | 149 | def get_pos_by_id(self, state, id): 150 | state_list = list(np.array(state).ravel()) 151 | return state_list.index(id) 152 | 153 | def onehot(self, index, size): 154 | n_hot = np.zeros(size) 155 | n_hot[index] = 1.0 156 | return n_hot 157 | 158 | def index_to_action(self, index): 159 | action_list = [] 160 | for i in range(FLAGS.n_predator-1): 161 | action_list.append(index%5) 162 | index = index/5 163 | action_list.append(index) 164 | return action_list 165 | 166 | def action_to_index(self, action): 167 | index = 0 168 | for i in range(FLAGS.n_predator): 169 | index += action[i] * 5 ** i 170 | return index 171 | 172 | def action_to_onehot(self, action): 173 | onehot = np.zeros([self._n_predator, self._action_dim_single]) 174 | for i in range(self._n_predator): 175 | onehot[i, action[i]] = 1 176 | return onehot 177 | 178 | def onehot_to_action(self, onehot): 179 | action = np.zeros([self._n_predator]) 180 | for i in range(self._n_predator): 181 | action[i] = int(np.argmax(onehot[i])) 182 | return action 183 | 184 | def q_diff(self): 185 | 186 | # if self.q_prev == None: 187 | # self.q_prev = self.q() 188 | # return 189 | 190 | # q_next = self.q() 191 | 192 | # d = 0.0 193 | # a = 0.0 194 | # for i in range(100): 195 | # d += math.fabs(self.q_prev[i] - q_next[i]) 196 | # a += q_next[i] 197 | # avg = a/100 198 | 199 | # self._eval.update_value("q_avg", avg, self.update_cnt) 200 | # self._eval.update_value("q_diff", d, self.update_cnt) 201 | 202 | # self.q_prev = q_next 203 | 204 | # print self.update_cnt, d, avg 205 | 206 | print self.update_cnt 207 | 208 | def q(self): 209 | q_value = [] 210 | # for i in range(100): 211 | # s = self.s_array[:,i] 212 | # s = (s - FLAGS.map_size /2.) / FLAGS.map_size 213 | # q = self.q_network.get_target_q_values(s[None])[0] 214 | # q_max = np.max(q) 215 | # q_value.append(q_max) 216 | return q_value 217 | 218 | def logging(self, reward, step): 219 | 220 | summary = self.q_network.summary(reward, step) 221 | 222 | self.train_writer.add_summary(summary, step) -------------------------------------------------------------------------------- /Predator-Prey/agents/pos_cac_fo/replay_buffer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf8 3 | import logging 4 | import config 5 | from collections import deque 6 | import random 7 | 8 | FLAGS = config.flags.FLAGS 9 | 10 | logger = logging.getLogger("Agent.replay") 11 | result = logging.getLogger('Result') 12 | 13 | 14 | class ReplayBuffer: 15 | def __init__(self): 16 | self.replay_memory_capacity = FLAGS.b_size # capacity of experience replay memory 17 | self.minibatch_size = FLAGS.m_size # size of minibatch from experience replay memory for updates 18 | self.replay_memory = deque(maxlen=self.replay_memory_capacity) 19 | 20 | def add_to_memory(self, experience): 21 | self.replay_memory.append(experience) 22 | 23 | def sample_from_memory(self): 24 | return random.sample(self.replay_memory, self.minibatch_size) 25 | 26 | def erase(self): 27 | self.replay_memory.popleft() 28 | -------------------------------------------------------------------------------- /Predator-Prey/agents/pos_cac_fo/trainer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf8 3 | 4 | """ 5 | =========================================== 6 | :mod:`qlearn` Q-Learning 7 | =========================================== 8 | 9 | 10 | 설명 11 | ===== 12 | 13 | Choose action based on q-learning algorithm 14 | """ 15 | 16 | import numpy as np 17 | import tensorflow as tf 18 | from agents.pos_cac_fo.agent import Agent 19 | from agents.simple_agent import RandomAgent as NonLearningAgent 20 | from agents.evaluation import Evaluation 21 | from agents.simple_agent import StaticAgent as StAgent 22 | from agents.simple_agent import ActiveAgent as AcAgent 23 | import logging 24 | import config 25 | from envs.gui import canvas 26 | 27 | FLAGS = config.flags.FLAGS 28 | logger = logging.getLogger("Agent") 29 | result = logging.getLogger('Result') 30 | 31 | training_step = FLAGS.training_step 32 | testing_step = FLAGS.testing_step 33 | 34 | epsilon_dec = 2.0/training_step 35 | epsilon_min = 0.1 36 | 37 | 38 | class Trainer(object): 39 | 40 | def __init__(self, env): 41 | logger.info("Centralized DQN Trainer is created") 42 | 43 | self._env = env 44 | self._eval = Evaluation() 45 | self._n_predator = FLAGS.n_predator 46 | self._n_prey = FLAGS.n_prey 47 | self._agent_profile = self._env.get_agent_profile() 48 | self._agent_precedence = self._env.agent_precedence 49 | 50 | self._agent = Agent(self._agent_profile["predator"]["act_dim"], self._agent_profile["predator"]["obs_dim"][0]) 51 | self._prey_agent = AcAgent(5) 52 | 53 | self.epsilon = 1.0 54 | if FLAGS.load_nn: 55 | self.epsilon = epsilon_min 56 | 57 | if FLAGS.gui: 58 | self.canvas = canvas.Canvas(self._n_predator, self._n_prey, FLAGS.map_size) 59 | self.canvas.setup() 60 | def learn(self): 61 | 62 | step = 0 63 | episode = 0 64 | print_flag = False 65 | count = 1 66 | 67 | while step < training_step: 68 | episode += 1 69 | ep_step = 0 70 | obs = self._env.reset() 71 | state = self._env.get_full_encoding()[:, :, 2] 72 | total_reward = 0 73 | total_reward_pos = 0 74 | total_reward_neg = 0 75 | self.random_action_generator() 76 | while True: 77 | step += 1 78 | ep_step += 1 79 | action = self.get_action(obs, step, state) 80 | obs_n, reward, done, info = self._env.step(action) 81 | state_n = self._env.get_full_encoding()[:, :, 2] 82 | done_single = sum(done) > 0 83 | 84 | self.train_agents(state, action, reward, state_n, done_single) 85 | obs = obs_n 86 | state = state_n 87 | total_reward += np.sum(reward) 88 | if np.sum(reward) >= 0: 89 | total_reward_pos += np.sum(reward) 90 | else: 91 | total_reward_neg += np.sum(reward) 92 | 93 | if is_episode_done(done, step) or ep_step >= FLAGS.max_step : 94 | # print step, ep_step, total_reward 95 | if print_flag and episode % FLAGS.eval_step == 1: 96 | print "[train_ep %d]" % (episode), "\treward", total_reward_pos, total_reward_neg 97 | break 98 | 99 | if episode % FLAGS.eval_step == 0: 100 | self.test(episode) 101 | 102 | self._eval.summarize() 103 | 104 | def random_action_generator(self): 105 | rand_unit = np.random.uniform(size = (FLAGS.n_predator, 5)) 106 | self.rand = rand_unit / np.sum(rand_unit, axis=1, keepdims=True) 107 | 108 | 109 | def get_action(self, obs, step, state, train=True): 110 | act_n = [] 111 | if train == True: 112 | self.epsilon = max(self.epsilon - epsilon_dec, epsilon_min) 113 | 114 | # Action of predator 115 | 116 | action_list = self._agent.act(state) 117 | for i in range(self._n_predator): 118 | if train and (step < FLAGS.m_size * FLAGS.pre_train_step or np.random.rand() < self.epsilon): 119 | action = np.random.choice(5) 120 | act_n.append(action) 121 | else: 122 | act_n.append(action_list[i]) 123 | 124 | 125 | 126 | # Action of prey 127 | for i in range(FLAGS.n_prey): 128 | act_n.append(self._prey_agent.act(state, i)) 129 | # act_n[1] = 2 130 | 131 | return np.array(act_n, dtype=np.int32) 132 | 133 | def train_agents(self, state, action, reward, state_n, done): 134 | self._agent.train(state, action, reward, state_n, done) 135 | 136 | def test(self, curr_ep=None): 137 | 138 | step = 0 139 | episode = 0 140 | 141 | test_flag = FLAGS.kt 142 | sum_reward = 0 143 | sum_reward_pos = 0 144 | sum_reward_neg = 0 145 | while step < testing_step: 146 | episode += 1 147 | obs = self._env.reset() 148 | state = self._env.get_full_encoding()[:, :, 2] 149 | if test_flag: 150 | print "\nInit\n", state 151 | total_reward = 0 152 | total_reward_pos = 0 153 | total_reward_neg = 0 154 | 155 | ep_step = 0 156 | 157 | while True: 158 | 159 | step += 1 160 | ep_step += 1 161 | 162 | action = self.get_action(obs, step, state, False) 163 | obs_n, reward, done, info = self._env.step(action) 164 | state_n = self._env.get_full_encoding()[:, :, 2] 165 | state_next = state_to_index(state_n) 166 | if FLAGS.gui: 167 | self.canvas.draw(state_next, done, "Score:" + str(total_reward) + ", Step:" + str(ep_step)) 168 | 169 | if test_flag: 170 | aa = raw_input('>') 171 | if aa == 'c': 172 | test_flag = False 173 | print action 174 | print state_n 175 | print reward 176 | 177 | obs = obs_n 178 | state = state_n 179 | r = np.sum(reward) 180 | # if r == 0.1: 181 | # r = r * (-1.) * FLAGS.penalty 182 | total_reward += r # * (FLAGS.df ** (ep_step-1)) 183 | if r > 0: 184 | total_reward_pos += r 185 | else: 186 | total_reward_neg -= r 187 | 188 | 189 | if is_episode_done(done, step, "test") or ep_step >= FLAGS.max_step: 190 | 191 | if FLAGS.gui: 192 | self.canvas.draw(state_next, done, "Hello", "Score:" + str(total_reward) + ", Step:" + str(ep_step)) 193 | 194 | break 195 | sum_reward += total_reward 196 | sum_reward_pos += total_reward_pos 197 | sum_reward_neg += total_reward_neg 198 | if FLAGS.scenario =="pursuit": 199 | print "Test result: Average steps to capture: ", curr_ep, float(step)/episode 200 | self._eval.update_value("training result: ", float(step)/episode, curr_ep) 201 | elif FLAGS.scenario =="endless" or FLAGS.scenario =="endless2" or FLAGS.scenario =="endless3": 202 | print "Average reward:", FLAGS.penalty, curr_ep, sum_reward /episode, sum_reward_pos/episode, sum_reward_neg/episode 203 | self._eval.update_value("training result: ", sum_reward/episode, curr_ep) 204 | self._agent.logging(sum_reward/episode, curr_ep * 100) 205 | 206 | 207 | def is_episode_done(done, step, e_type="train"): 208 | 209 | if e_type == "test": 210 | if sum(done) > 0 or step >= FLAGS.testing_step: 211 | return True 212 | else: 213 | return False 214 | 215 | else: 216 | if sum(done) > 0 or step >= FLAGS.training_step: 217 | return True 218 | else: 219 | return False 220 | 221 | def state_to_index(state): 222 | """ 223 | For the single agent case, the state is only related to the position of agent 1 224 | :param state: 225 | :return: 226 | """ 227 | 228 | ret = np.zeros(2 * (FLAGS.n_predator + FLAGS.n_prey)) 229 | for i in range(FLAGS.n_predator + FLAGS.n_prey): 230 | p = np.argwhere(np.array(state)==i+1)[0] 231 | #p = self.get_pos_by_id(state, i+1) 232 | ret[2 * i] = p[0] 233 | ret[2 * i + 1] = p[1] 234 | 235 | 236 | return ret 237 | 238 | 239 | 240 | 241 | -------------------------------------------------------------------------------- /Predator-Prey/agents/replay_buffer.py: -------------------------------------------------------------------------------- 1 | from collections import deque 2 | import random 3 | import config 4 | import numpy as np 5 | 6 | FLAGS = config.flags.FLAGS 7 | 8 | replay_memory_capacity = FLAGS.replay_buffer_capacity # capacity of experience replay memory 9 | minibatch_size = FLAGS.minibatch_size # size of minibatch from experience replay memory for updates 10 | trace_length = FLAGS.rnn_trace_len 11 | 12 | class ReplayBuffer: 13 | def __init__(self): 14 | self.replay_memory = deque(maxlen=replay_memory_capacity) 15 | 16 | def add_to_memory(self, experience): 17 | self.replay_memory.append(experience) 18 | 19 | def sample_from_memory(self): 20 | return random.sample(self.replay_memory, minibatch_size) 21 | 22 | class RNNReplayBuffer: 23 | def __init__(self): 24 | self.replay_memory = deque(maxlen=replay_memory_capacity) 25 | self.paddings = None 26 | 27 | def add_to_memory(self, experience): 28 | self.replay_memory.append(experience) 29 | 30 | if self.paddings == None: 31 | obs = np.zeros(experience[-1][0].shape) 32 | self.paddings = (obs, 0, 0, obs, True) 33 | 34 | def pad_trace(self, trace): 35 | trace.extend([self.paddings]*(trace_length-len(trace))) 36 | return trace 37 | 38 | def sample_from_memory(self): 39 | if len(self.replay_memory) < minibatch_size: 40 | n_points_per_ep = int(np.ceil(minibatch_size * 1./len(self.replay_memory))) 41 | sampled_episodes = self.replay_memory 42 | else: 43 | n_points_per_ep = 1 44 | sampled_episodes = random.sample(self.replay_memory, minibatch_size) 45 | 46 | sampledTraces = [] 47 | true_trace_length = np.ones(minibatch_size)*trace_length 48 | 49 | for i in range(n_points_per_ep): 50 | for j, episode in enumerate(sampled_episodes): 51 | if len(episode) < trace_length: 52 | true_trace_length[j] = len(episode) 53 | sampledTraces.append(self.pad_trace(episode)) # use the whole episode 54 | else: 55 | point = np.random.randint(0,len(episode) + 1 - trace_length) 56 | sampledTraces.append(episode[point:point + trace_length]) 57 | 58 | sampledTraces = np.array(sampledTraces[:minibatch_size]) # discard extra samples 59 | sampledTraces = np.reshape(sampledTraces,[minibatch_size*trace_length,-1]) 60 | return sampledTraces, true_trace_length 61 | -------------------------------------------------------------------------------- /Predator-Prey/agents/simple_agent.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import config 3 | FLAGS = config.flags.FLAGS 4 | 5 | class RandomAgent(object): 6 | def __init__(self, action_dim): 7 | self._action_dim = action_dim 8 | 9 | def act(self, obs): 10 | 11 | if np.random.rand() < 3./8. : 12 | return 2 13 | else: 14 | return np.random.randint(self._action_dim) 15 | 16 | # return 2 17 | 18 | def train(self, minibatch, step): 19 | return 20 | 21 | class StaticAgent(object): 22 | def __init__(self, action): 23 | self._action = action 24 | 25 | def act(self, obs): 26 | return self._action 27 | 28 | def train(self, minibatch, step): 29 | return 30 | class ActiveAgent(object): 31 | def __init__(self, action_dim): 32 | self._action_dim = action_dim 33 | self._n_predator = FLAGS.n_predator 34 | self._n_prey = FLAGS.n_prey 35 | self._state_dim = 2 * (self._n_predator + self._n_prey) 36 | 37 | 38 | def act(self, state, num): 39 | state_i = self.state_to_index(state) 40 | # s = np.reshape(state_i, [self._state_dim/2, 2]) 41 | self.map_size = FLAGS.map_size 42 | threshold = self.map_size * 2.0 43 | i = self._n_predator + num 44 | action_i = 2 45 | if np.random.rand() < 1.0 : 46 | return np.random.randint(self._action_dim) 47 | pos_i = np.argwhere(np.array(state)==i+1)[0] 48 | for j in range(FLAGS.n_predator): 49 | pos_j = np.argwhere(np.array(state)==j+1)[0] 50 | if abs(pos_i[0] - pos_j[0]) + abs(pos_i[1] - pos_j[1]) < threshold: 51 | p = np.zeros(5) 52 | threshold = abs(pos_i[0] - pos_j[0]) + abs(pos_i[1] - pos_j[1]) 53 | if (pos_i[0] - pos_j[0]) >= abs((pos_i[1] - pos_j[1])): 54 | p[0] = 1 55 | elif (pos_i[1] - pos_j[1]) >= abs((pos_i[0] - pos_j[0])): 56 | p[1] = 1 57 | elif (pos_i[1] - pos_j[1]) <= -abs((pos_i[0] - pos_j[0])): 58 | p[3] = 1 59 | elif (pos_i[0] - pos_j[0]) <= -abs((pos_i[1] - pos_j[1])): 60 | p[4] = 1 61 | action_i = np.random.choice(self._action_dim, p=p/np.sum(p)) 62 | if threshold == 1: 63 | return 2 64 | return action_i 65 | 66 | def state_to_index(self, state): 67 | """ 68 | For the single agent case, the state is only related to the position of agent 1 69 | :param state: 70 | :return: 71 | """ 72 | # p1, p2 = self.get_predator_pos(state) 73 | ret = np.zeros(self._state_dim) 74 | for i in range(FLAGS.n_predator + FLAGS.n_prey): 75 | p = np.argwhere(np.array(state)==i+1)[0] 76 | #p = self.get_pos_by_id(state, i+1) 77 | ret[2 * i] = p[0] 78 | ret[2 * i + 1] = p[1] 79 | 80 | return ret 81 | -------------------------------------------------------------------------------- /Predator-Prey/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf8 3 | 4 | import tensorflow as tf 5 | import logging 6 | import time 7 | import envs.config_env as config_env 8 | import agents.config_agents as config_agent 9 | 10 | flags = tf.flags 11 | 12 | flags.DEFINE_integer("seed", 0, "Random seed number") 13 | flags.DEFINE_string("folder", "default", "Result file folder name") 14 | flags.DEFINE_string("comment", "None", 15 | "Additional Comments") 16 | flags.DEFINE_boolean("gui", False, "Activate GUI") 17 | 18 | config_env.config_env(flags) 19 | config_agent.config_agent(flags) 20 | 21 | # Make result file with given filename 22 | now = time.localtime() 23 | s_time = "%02d%02d%02d%02d%02d" % (now.tm_mon, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec) 24 | file_name = str(flags.FLAGS.n_predator) + "-" 25 | file_name += config_env.get_filename() + "-" + config_agent.get_filename() 26 | file_name += "-seed-"+str(flags.FLAGS.seed)+"-" + s_time + "-" + flags.FLAGS.comment 27 | result = logging.getLogger('Result') 28 | result.setLevel(logging.INFO) 29 | 30 | if flags.FLAGS.folder == "default": 31 | result_fh = logging.FileHandler("./results/eval/r-" + file_name + ".txt") 32 | nn_filename = "./results/nn/n-" + file_name 33 | tb_filename = "./results/board/tb-" + file_name 34 | else: 35 | result_fh = logging.FileHandler("./results/eval/"+ flags.FLAGS.folder +"/r-" + file_name + ".txt") 36 | nn_filename = "./results/nn/" + flags.FLAGS.folder + "/n-" + file_name 37 | tb_filename = "./results/board/" + flags.FLAGS.folder + "/tb-" + file_name 38 | 39 | result_fm = logging.Formatter('[%(filename)s:%(lineno)s] %(asctime)s\t%(message)s') 40 | result_fh.setFormatter(result_fm) 41 | result.addHandler(result_fh) 42 | 43 | # Used to map colors to integers 44 | COLOR_TO_IDX = { 45 | 'red' : 0, 46 | 'green' : 1, 47 | 'blue' : 2, 48 | 'purple': 3, 49 | 'yellow': 4, 50 | 'grey' : 5 51 | } 52 | 53 | IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys())) 54 | 55 | # Map of object type to integers 56 | OBJECT_TO_IDX = { 57 | 'empty' : 0, 58 | 'wall' : 1, 59 | 'agent' : 2, 60 | 'predator' : 3, 61 | 'prey' : 4, 62 | 'prey2' : 5 63 | } 64 | 65 | IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys())) -------------------------------------------------------------------------------- /Predator-Prey/envs/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | # from gym.envs.registration import register 3 | 4 | # Multiagent envs 5 | # ---------------------------------------- 6 | 7 | # register( 8 | # id='MultiagentSimple-v0', 9 | # entry_point='multiagent.envs:SimpleEnv', 10 | # # FIXME(cathywu) currently has to be exactly max_path_length parameters in 11 | # # rllab run script 12 | # max_episode_steps=100, 13 | # ) 14 | 15 | # register( 16 | # id='MultiagentSimpleSpeakerListener-v0', 17 | # entry_point='multiagent.envs:SimpleSpeakerListenerEnv', 18 | # max_episode_steps=100, 19 | # ) 20 | 21 | logger_agent = logging.getLogger('GridMARL') 22 | logger_agent.setLevel(logging.INFO) 23 | 24 | fm = logging.Formatter('[%(levelname)s|%(filename)s:%(lineno)s] %(asctime)s > [%(name)s] %(message)s') 25 | sh = logging.StreamHandler() 26 | sh.setFormatter(fm) 27 | logger_agent.addHandler(sh) 28 | 29 | # fh_agent = logging.FileHandler('./agent.log') 30 | # fh_agent.setFormatter(fm) 31 | # logger_agent.addHandler(fh_agent) 32 | -------------------------------------------------------------------------------- /Predator-Prey/envs/config_env.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf8 3 | 4 | 5 | 6 | def config_env(_flags): 7 | flags = _flags 8 | 9 | 10 | # Scenario 11 | flags.DEFINE_string("scenario", "pursuit", "Scenario") 12 | flags.DEFINE_integer("n_predator", 2, "Number of predators") 13 | flags.DEFINE_integer("n_prey1", 1, "Number of preys 1") 14 | flags.DEFINE_integer("n_prey2", 1, "Number of preys 2") 15 | flags.DEFINE_integer("n_prey", 2, "Number of preys") 16 | # Observation 17 | flags.DEFINE_integer("history_len", 1, "How many previous steps we look back") 18 | 19 | # core 20 | flags.DEFINE_integer("map_size", 3, "Size of the map") 21 | flags.DEFINE_float("render_every", 1000, "Render the nth episode") 22 | 23 | # Penalty 24 | flags.DEFINE_integer("penalty", 1, "reward penalty") 25 | 26 | def get_filename(): 27 | import config 28 | FLAGS = config.flags.FLAGS 29 | 30 | return "s-"+FLAGS.scenario+"-map-"+str(FLAGS.map_size)+"-penalty-"+str(FLAGS.penalty) -------------------------------------------------------------------------------- /Predator-Prey/envs/environment.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | import numpy as np 4 | 5 | # environment for all agents in the multiagent world 6 | # currently code assumes that no agents will be created/destroyed at runtime! 7 | 8 | 9 | class MultiAgentEnv(gym.Env): 10 | metadata = { 11 | 'render.modes': ['human', 'rgb_array'] 12 | } 13 | 14 | def __init__(self, world, reset_callback=None, reward_callback=None, 15 | observation_callback=None, info_callback=None, 16 | done_callback=None, shared_viewer=True): 17 | 18 | self.world = world 19 | self.agents = self.world.agents 20 | # set required vectorized gym env property 21 | self.n = len(world.agents) 22 | # scenario callbacks 23 | self.reset_callback = reset_callback 24 | self.reward_callback = reward_callback 25 | self.observation_callback = observation_callback 26 | self.info_callback = info_callback 27 | self.done_callback = done_callback 28 | 29 | # environment parameters 30 | self.discrete_comm_space = True 31 | self.time = 0 32 | 33 | # configure spaces 34 | self.action_space = [] 35 | self.observation_space = [] 36 | self.agent_precedence = [] 37 | for agent in self.agents: 38 | self.agent_precedence.append(agent.itype) 39 | total_action_space = [] 40 | u_action_space = spaces.Discrete(world.dim_p * 2 + 1) 41 | total_action_space.append(u_action_space) 42 | 43 | # communication action space 44 | if self.discrete_comm_space: 45 | c_action_space = spaces.Discrete(world.dim_c) 46 | else: 47 | c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,)) 48 | 49 | if not agent.silent: 50 | total_action_space.append(c_action_space) 51 | 52 | # total action space 53 | if len(total_action_space) > 1: 54 | # all action spaces are discrete, so simplify to MultiDiscrete action space 55 | if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]): 56 | act_space = spaces.MultiDiscrete([act_space.n for act_space in total_action_space]) 57 | else: 58 | act_space = spaces.Tuple(total_action_space) 59 | self.action_space.append(act_space) 60 | else: 61 | self.action_space.append(total_action_space[0]) 62 | 63 | # observation space 64 | obs_dim = len(observation_callback(agent, self.world).flatten()) 65 | self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,))) 66 | agent.action.c = np.zeros(self.world.dim_c) 67 | 68 | def get_agent_profile(self): 69 | agent_profile = {} 70 | 71 | for i, agent in enumerate(self.agents): 72 | if agent.itype in agent_profile: 73 | agent_profile[agent.itype]['n_agent'] += 1 74 | agent_profile[agent.itype]['idx'].append(i) 75 | else: 76 | if isinstance(self.action_space[i], spaces.Discrete): 77 | act_space = self.action_space[i].n 78 | com_space = 0 79 | else: 80 | act_space = self.action_space[i].nvec[0] 81 | com_space = self.action_space[i].nvec[1] 82 | 83 | agent_profile[agent.itype] = { 84 | 'n_agent': 1, 85 | 'idx': [i], 86 | 'act_dim': act_space, 87 | 'com_dim': com_space, 88 | 'obs_dim': self.observation_space[i].shape 89 | } 90 | 91 | return agent_profile 92 | 93 | def step(self, action_n): 94 | obs_n = [] 95 | reward_n = [] 96 | done_n = [] 97 | reset_n = [] 98 | info_n = {'n': []} 99 | 100 | self.agents = self.world.agents 101 | self.world.step(action_n) 102 | 103 | for agent in self.agents: 104 | reward_n.append(self._get_reward(agent)) 105 | 106 | for agent in self.agents: 107 | info_n['n'].append(self._get_info(agent)) 108 | 109 | for agent in self.agents: 110 | reset_n.append(self._get_done(agent)) 111 | # done_n.append(self._get_done(agent)) 112 | 113 | i = 0 114 | for agent in self.agents: 115 | if reset_n[i] == True: 116 | self.world.resetObj(agent) 117 | i += 1 118 | for agent in self.agents: 119 | done_n.append(False) 120 | 121 | for agent in self.agents: 122 | obs_n.append(self._get_obs(agent)) 123 | 124 | return obs_n, reward_n, done_n, info_n 125 | 126 | def reset(self): 127 | # reset world 128 | self.reset_callback(self.world) 129 | 130 | obs_n = [] 131 | for agent in self.agents: 132 | obs_n.append(self._get_obs(agent)) 133 | return obs_n 134 | 135 | # get info used for benchmarking 136 | def _get_info(self, agent): 137 | if self.info_callback is None: 138 | return {} 139 | return self.info_callback(agent, self.world) 140 | 141 | # get observation for a particular agent 142 | def _get_obs(self, agent): 143 | if self.observation_callback is None: 144 | return np.zeros(0) 145 | return self.observation_callback(agent, self.world) 146 | 147 | # get dones for a particular agent 148 | # unused right now -- agents are allowed to go beyond the viewing screen 149 | def _get_done(self, agent): 150 | if self.done_callback is None: 151 | return False 152 | return self.done_callback(agent, self.world) 153 | 154 | def _get_done(self, agent): 155 | if self.done_callback is None: 156 | return False 157 | return self.done_callback(agent, self.world) 158 | 159 | # get reward for a particular agent 160 | def _get_reward(self, agent): 161 | if self.reward_callback is None: 162 | return 0.0 163 | return self.reward_callback(agent, self.world) 164 | 165 | def get_full_encoding(self): 166 | return self.world.get_full_encoding() -------------------------------------------------------------------------------- /Predator-Prey/envs/grid_core.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import config 3 | 4 | 5 | COLOR_TO_IDX = config.COLOR_TO_IDX 6 | OBJECT_TO_IDX = config.OBJECT_TO_IDX 7 | 8 | N = 0 9 | E = 1 10 | O = 2 11 | W = 3 12 | S = 4 13 | 14 | # action of the agent 15 | class Action(object): 16 | def __init__(self): 17 | # physical action 18 | self.u = None 19 | # communication action 20 | self.c = None 21 | 22 | # properties and state of physical world entity 23 | class Entity(object): 24 | def __init__(self, itype, color): 25 | assert itype in OBJECT_TO_IDX, itype 26 | assert color in COLOR_TO_IDX, color 27 | self.itype = itype 28 | self.color = color 29 | self.contains = None 30 | 31 | # name 32 | self.name = '' 33 | # properties: 34 | self.movable = False 35 | # entity collides with others 36 | self.collide = True 37 | # material density (affects mass) 38 | self.density = 25.0 39 | 40 | @property 41 | def pos(self): 42 | return self._x, self._y 43 | 44 | def set_pos(self, x, y): 45 | self._x = x 46 | self._y = y 47 | 48 | # properties of agent entities 49 | class CoreAgent(Entity): 50 | def __init__(self, itype='agent', color='green'): 51 | super(CoreAgent, self).__init__(itype, color) 52 | self.name = "" 53 | # agents are movable by default 54 | self.movable = True 55 | # cannot send communication signals 56 | self.silent = True 57 | # action 58 | self.action = Action() 59 | # if waiting for other agents action 60 | self.waiting = False 61 | # if done doing its action in the current step 62 | self.done_moving = False 63 | # if the intended step collided 64 | self.collided = False 65 | 66 | self._obs = None 67 | self._x = 0 68 | self._y = 0 69 | self.obs_range = 1 70 | 71 | def update_obs(self, obs): 72 | self._obs = obs 73 | 74 | def get_obs(self): 75 | return self._obs 76 | 77 | class Wall(Entity): 78 | def __init__(self, color='grey'): 79 | super(Wall, self).__init__('wall', color) 80 | 81 | class Grid(object): 82 | """ 83 | Represent a grid and operations on it 84 | """ 85 | 86 | def __init__(self, width, height): 87 | assert width >= 2 88 | assert height >= 2 89 | 90 | self.width = width 91 | self.height = height 92 | self.reset() 93 | 94 | def set(self, i, j, v): 95 | assert i >= 0 and i < self.width 96 | assert j >= 0 and j < self.height 97 | self.grid[j * self.width + i] = v 98 | 99 | def get(self, i, j): 100 | if ((i >= 0 and i < self.width) and \ 101 | (j >= 0 and j < self.height)): 102 | return self.grid[j * self.width + i] 103 | 104 | return Wall() 105 | 106 | def reset(self): 107 | self.grid = [None] * self.width * self.height 108 | 109 | def setHorzWall(self, x, y, length=None): 110 | if length is None: 111 | length = self.width - x 112 | for i in range(0, length): 113 | self.set(x + i, y, Wall()) 114 | 115 | def setVertWall(self, x, y, length=None): 116 | if length is None: 117 | length = self.height - y 118 | for j in range(0, length): 119 | self.set(x, y + j, Wall()) 120 | 121 | def wallRect(self, x, y, w, h): 122 | self.setHorzWall(x, y, w) 123 | self.setHorzWall(x, y+h-1, w) 124 | self.setVertWall(x, y, h) 125 | self.setVertWall(x+w-1, y, h) 126 | 127 | def slice(self, topX, topY, width, height): 128 | """ 129 | Get a subset of the grid 130 | """ 131 | 132 | grid = Grid(width, height) 133 | 134 | for j in range(0, height): 135 | for i in range(0, width): 136 | x = topX + i 137 | y = topY + j 138 | 139 | if x >= 0 and x < self.width and \ 140 | y >= 0 and y < self.height: 141 | v = self.get(x, y) 142 | else: 143 | v = Wall() 144 | 145 | grid.set(i, j, v) 146 | 147 | return grid 148 | 149 | def encode(self): 150 | """ 151 | Produce a compact numpy encoding of the grid 152 | """ 153 | 154 | array = np.zeros(shape=(self.height, self.width, 3), dtype=np.int8) 155 | 156 | for j in range(0, self.height): 157 | for i in range(0, self.width): 158 | 159 | v = self.get(i, j) 160 | if isinstance(v, CoreAgent): 161 | array[j, i, 2] = v.id 162 | 163 | if v == None: 164 | continue 165 | 166 | 167 | array[j, i, 0] = OBJECT_TO_IDX[v.itype] 168 | array[j, i, 1] = COLOR_TO_IDX[v.color] 169 | 170 | return array 171 | 172 | # multi-agent world 173 | class World(object): 174 | def __init__(self, width, height): 175 | # list of agents and entities (can change at execution-time!) 176 | self.agents = [] 177 | 178 | # communication channel dimensionality 179 | self.dim_c = 0 180 | # position dimensionality 181 | self.dim_p = 2 182 | 183 | self.width = width 184 | self.height = height 185 | 186 | self.grid = Grid(self.width, self.height) 187 | self.grid.wallRect(0, 0, self.width, self.height) 188 | 189 | self.step_cnt = 0 190 | 191 | def empty_grid(self): 192 | self.step_cnt = 0 193 | self.grid.reset() 194 | 195 | def placeObj(self, obj, top=None, size=None, reject_fn=None): 196 | """ 197 | Place an object at an empty position in the grid 198 | 199 | :param top: top-left position of the rectangle where to place 200 | :param size: size of the rectangle where to randomly place 201 | :param reject_fn: function to filter out potential positions 202 | """ 203 | 204 | if top is None: 205 | top = (0, 0) 206 | 207 | if size is None: 208 | size = (self.grid.width, self.grid.height) 209 | 210 | while True: 211 | pos = ( 212 | np.random.randint(top[0], top[0] + size[0]), 213 | np.random.randint(top[1], top[1] + size[1]) 214 | ) 215 | 216 | # Don't place the object on top of another object 217 | if self.grid.get(*pos) != None: 218 | continue 219 | 220 | # Check if there is a filtering criterion 221 | if reject_fn and reject_fn(self, pos): 222 | continue 223 | 224 | break 225 | 226 | self.grid.set(pos[0], pos[1], obj) 227 | obj.set_pos(pos[0], pos[1]) 228 | return pos 229 | 230 | def resetObj(self, obj, top=None, size=None, reject_fn=None): 231 | """ 232 | Reset an object at an empty position in the grid 233 | 234 | :param top: top-left position of the rectangle where to place 235 | :param size: size of the rectangle where to randomly place 236 | :param reject_fn: function to filter out potential positions 237 | """ 238 | if top is None: 239 | top = (0, 0) 240 | 241 | if size is None: 242 | size = (self.grid.width, self.grid.height) 243 | 244 | while True: 245 | pos = ( 246 | np.random.randint(top[0], top[0] + size[0]), 247 | np.random.randint(top[1], top[1] + size[1]) 248 | ) 249 | 250 | # Don't place the object on top of another object 251 | if self.grid.get(*pos) != None: 252 | continue 253 | 254 | # Check if there is a filtering criterion 255 | if reject_fn and reject_fn(self, pos): 256 | continue 257 | 258 | break 259 | x, y = obj.pos 260 | self.grid.set(x, y, None) 261 | self.grid.set(pos[0], pos[1], obj) 262 | obj.set_pos(pos[0], pos[1]) 263 | return pos 264 | 265 | def single_agent_step(self, agent, action): 266 | if agent.done_moving or agent.waiting: 267 | return 268 | 269 | x, y = agent.pos 270 | action = agent.action.u 271 | 272 | if action == N: 273 | y -= 1 274 | elif action == E: 275 | x -= 1 276 | elif action == W: 277 | x += 1 278 | elif action == S: 279 | y += 1 280 | elif action == O: 281 | agent.done_moving = True 282 | agent.collided = False 283 | return 284 | 285 | intended_cell = self.grid.get(x, y) 286 | if isinstance(intended_cell, CoreAgent): 287 | agent.waiting = True 288 | # let the other agent move first 289 | self.single_agent_step(intended_cell, intended_cell.action.u) 290 | agent.waiting = False 291 | # get the intended cell (to check if it is empty) 292 | intended_cell = self.grid.get(x, y) 293 | 294 | # check if the intended cell is empty 295 | if not intended_cell is None: 296 | agent.collided = True 297 | else: 298 | x_0, y_0 = agent.pos 299 | self.grid.set(x_0, y_0, None) 300 | self.grid.set(x, y, agent) 301 | agent.set_pos(x, y) 302 | agent.collided = False 303 | 304 | agent.done_moving = True 305 | 306 | # update state of the world 307 | def step(self, action_n): 308 | self.step_cnt += 1 309 | # set the action 310 | for i, agent in enumerate(self.agents): 311 | agent.action.u = action_n[i] 312 | agent.done_moving = False 313 | 314 | # do the action 315 | for agent in self.agents: 316 | self.single_agent_step(agent, agent.action.u) 317 | 318 | # update observations of all agents 319 | self.set_observations() 320 | 321 | def set_observations(self): 322 | for agent in self.agents: 323 | x, y = agent.pos 324 | r = agent.obs_range 325 | obs = self.grid.slice(x-r, y-r,r*2+1,r*2+1) 326 | agent.update_obs(obs.encode()) 327 | 328 | def get_full_encoding(self): 329 | return self.grid.encode() -------------------------------------------------------------------------------- /Predator-Prey/envs/gui/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sonkyunghwan/QTRAN/785c44ebc8379896dc9f513af2ac767d61013914/Predator-Prey/envs/gui/__init__.py -------------------------------------------------------------------------------- /Predator-Prey/envs/gui/canvas.py: -------------------------------------------------------------------------------- 1 | 2 | # --------------------------------------------------------------- 3 | # Display on GUI the positions of predator agents and prey agents 4 | # --------------------------------------------------------------- 5 | 6 | import random 7 | import socket 8 | import threading 9 | import json 10 | import pygame 11 | import ConfigParser 12 | 13 | from time import sleep 14 | from math import pi, sin, cos, sqrt, ceil, floor 15 | from envs.gui.guiObjects import guiPred, guiPrey # no need for camera view 16 | 17 | GREY = (25, 25, 25, 128) 18 | WHITE = (255, 255, 255, 0) 19 | ORANGE = (255, 100, 0, 128) 20 | RED = (255, 0, 0) 21 | GREEN = (0, 153, 76, 128) 22 | 23 | edge_len_pix = 960 # This is the length of each edge in pixels 24 | # Note that we only consider square maps, which will be drawn on square PyGame surfaces 25 | 26 | # # Toy parameters (not yet discretized into the grid-world setting) 27 | # positions = [12, 245, 1003, 298, 933, 393, 1100, 28, 222, 353] 28 | # schedule = [0, 1, 0] 29 | 30 | # Toy parameters (now discretized into the grid-world setting) 31 | positions = [0, 1, 3, 4, 4, 1, 2, 4, 3, 1] 32 | schedule = [0, 1, 0] 33 | 34 | class Canvas(): 35 | def __init__(self, num_pred = 3, num_prey = 2, map_size = 5): 36 | # Take resolution and number of trackers as argument 37 | 38 | self.num_pred = num_pred 39 | self.num_prey = num_prey 40 | self.map_size = map_size 41 | 42 | # Define grid locator parameter 43 | # Since we're dealing with a square grid-world, 44 | # one locator is enough for both the x- and y-coordinates 45 | 46 | self.locator = int(edge_len_pix/self.map_size) 47 | 48 | # --- Some PyGame-related initialization --- 49 | pygame.init() 50 | self.clock = pygame.time.Clock() 51 | self.display_surface = pygame.display.set_mode((edge_len_pix, edge_len_pix)) 52 | pygame.display.set_caption("Predator Prey Simulator") 53 | self.movable_surface = pygame.Surface((edge_len_pix, edge_len_pix)) 54 | self.message_surface = pygame.Surface((edge_len_pix, 32), pygame.SRCALPHA) 55 | self.message_surface = self.message_surface.convert_alpha() 56 | self.done_surface = pygame.Surface((edge_len_pix, edge_len_pix), pygame.SRCALPHA) 57 | self.done_surface = self.done_surface.convert_alpha() 58 | self.mx = self.movable_surface.get_width() 59 | self.my = self.movable_surface.get_height() 60 | self.done = False 61 | 62 | # --- Diplay screen resolution --- 63 | # For displaying the message from the learning module 64 | self.fs = 32 65 | self.font = pygame.font.SysFont(pygame.font.get_default_font(), self.fs) 66 | 67 | # Frame is fixed 68 | self.framex = edge_len_pix 69 | self.framey = edge_len_pix 70 | 71 | # Movable surface is variable 72 | self.wx = self.mx 73 | self.wy = self.my 74 | self.zoom_sensitivity = 1.02 # Change this to zoom faster 75 | self.pan_sensitivity = 5 # Change this to move screen faster 76 | self.sx = 0 77 | self.sy = 0 78 | 79 | # --- Testing for scroll --- 80 | self.tx = 0 81 | self.ty = 0 82 | 83 | self.center_mark_size_px = 10 84 | self.center_mark_thickness_px = 1 85 | self.button_size_px = 50 86 | 87 | self.guiObjectsList = [] 88 | 89 | # Some viewing margin for the button spacing 90 | self.vmargin = 5 91 | 92 | # Correctors for intuitive viewing 93 | self.angle_corrector = 90 94 | self.x_corrector = self.mx/2 95 | self.y_corrector = self.my/2 96 | self.cam_view_scaler = 2 97 | 98 | self.button_value = 2 99 | 100 | def setup(self): 101 | 102 | # --- guiObjects setup --- 103 | # Randomly positioned for now... get real values later 104 | self.target_cnt = self.num_prey # Allow only one target 105 | self.target_size_px = 20 # The size of the target in pixels 106 | 107 | self.btn_pause_surface = pygame.Surface((self.button_size_px, self.button_size_px), pygame.SRCALPHA) 108 | self.btn_pause_surface = self.btn_pause_surface.convert_alpha() 109 | self.btn_pause_surface.fill(WHITE) 110 | 111 | self.btn_play_surface = pygame.Surface((self.button_size_px, self.button_size_px), pygame.SRCALPHA) 112 | self.btn_play_surface = self.btn_play_surface.convert_alpha() 113 | self.btn_play_surface.fill(WHITE) 114 | 115 | self.btn_ff_surface = pygame.Surface((self.button_size_px, self.button_size_px), pygame.SRCALPHA) 116 | self.btn_ff_surface = self.btn_ff_surface.convert_alpha() 117 | self.btn_ff_surface.fill(WHITE) 118 | 119 | self.button_press_reactor = {"pause":0, "play":0, "ff":0} 120 | 121 | 122 | 123 | # Append the predators first and then the preys 124 | for i in range(self.num_pred): 125 | self.pred = guiPred(pred_id = i) 126 | self.pred.setup() 127 | self.guiObjectsList.append(self.pred) 128 | 129 | for j in range(self.num_prey): 130 | self.prey = guiPrey(prey_id = j) 131 | self.prey.setup() 132 | self.guiObjectsList.append(self.prey) 133 | # guiObjectsList looks like this [pred0, pred1, ..., pred(num_pred-1), prey0, prey1, ..., prey(num_prey-1)] 134 | 135 | 136 | def button(self, text, bx, by, bw, bh, ac, ic, surface): 137 | mouse = pygame.mouse.get_pos() 138 | click = pygame.mouse.get_pressed() 139 | if bx + bw > mouse[0] > bx and by + bh > mouse[1] > by: 140 | pygame.draw.rect(surface, ac, (bx, by, self.button_size_px, self.button_size_px)) 141 | if click[0] == 1: 142 | pygame.draw.rect(surface, (255, 255, 0, 128), (bx, by, self.button_size_px, self.button_size_px)) 143 | else: 144 | pygame.draw.rect(surface, ic, (bx, by, self.button_size_px, self.button_size_px)) 145 | 146 | button_font = pygame.font.SysFont(pygame.font.get_default_font(), 20) 147 | button_label = button_font.render(text, True, (0, 0, 0)) 148 | surface.blit(button_label, (self.button_size_px/2 - button_font.size(text)[0]/2, self.button_size_px/2 - button_font.size(text)[1]/2)) 149 | self.display_surface.blit(surface, (bx, by)) 150 | 151 | def make_border(self, obj): 152 | 153 | pygame.draw.rect(obj.surface, obj.border_color, [0, 0, obj.sy, obj.border_thickness]) 154 | pygame.draw.rect(obj.surface, obj.border_color, [0, obj.sy - obj.border_thickness, obj.sy, obj.border_thickness]) 155 | pygame.draw.rect(obj.surface, obj.border_color, [0, 0, obj.border_thickness, obj.sy]) 156 | pygame.draw.rect(obj.surface, obj.border_color, [obj.sx - obj.border_thickness, 0, obj.border_thickness, obj.sy]) 157 | 158 | def draw(self, positions, schedule, msg=None, done=False): 159 | # positions is a list of x, y describing the x, y coordinates of each agent 160 | # schedule is a list whose elements are either 0 or 1, signifying that the agent is scheduled (1) or not (0) 161 | # while not self.done: 162 | mouse_pos = pygame.mouse.get_pos() 163 | for event in pygame.event.get(): 164 | if ((event.type == pygame.QUIT) or ((event.type == pygame.KEYDOWN) and (event.key == pygame.K_q))): 165 | self.done = True 166 | if event.type == pygame.MOUSEBUTTONDOWN: 167 | 168 | # --- Buttons --- 169 | if self.framey - self.button_size_px - self.vmargin < mouse_pos[1] < self.framey - self.vmargin: 170 | # Pause button 171 | if self.vmargin < mouse_pos[0] < self.vmargin + self.button_size_px: 172 | if event.type == pygame.MOUSEBUTTONDOWN: 173 | self.button_press_reactor["pause"] = min(255, self.button_press_reactor["pause"] + 200) 174 | # sent = self.conn.send("pause") # TODO 175 | self.button_value = 0 176 | 177 | 178 | # Play button 179 | if 2*self.vmargin + self.button_size_px < mouse_pos[0] < 2*self.vmargin + 2*self.button_size_px: 180 | if event.type == pygame.MOUSEBUTTONDOWN: 181 | self.button_press_reactor["play"] = min(255, self.button_press_reactor["play"] + 200) 182 | # sent = self.conn.send("play") # TODO 183 | self.button_value = 1 184 | 185 | 186 | # Fast-forward button 187 | if 3*self.vmargin + 2*self.button_size_px < mouse_pos[0] < 3*self.vmargin + 3*self.button_size_px: 188 | if event.type == pygame.MOUSEBUTTONDOWN: 189 | self.button_press_reactor["ff"] = min(255, self.button_press_reactor["ff"] + 200) 190 | # sent = self.conn.send("ff") # TODO 191 | self.button_value = 2 192 | 193 | if event.type == pygame.MOUSEBUTTONDOWN: 194 | if event.button == 4: 195 | self.wx *= self.zoom_sensitivity 196 | self.wy *= self.zoom_sensitivity 197 | if event.button == 5: 198 | self.wx /= self.zoom_sensitivity 199 | self.wy /= self.zoom_sensitivity 200 | 201 | pressed = pygame.key.get_pressed() 202 | if pressed[pygame.K_w]: self.sy += self.pan_sensitivity 203 | if pressed[pygame.K_s]: self.sy -= self.pan_sensitivity 204 | if pressed[pygame.K_a]: self.sx += self.pan_sensitivity 205 | if pressed[pygame.K_d]: self.sx -= self.pan_sensitivity 206 | 207 | # --- Fill background --- 208 | self.display_surface.fill(GREY) 209 | self.movable_surface.fill((255, 255, 255, 0)) 210 | self.message_surface.fill(GREY) 211 | self.done_surface.fill((255, 255, 0, 128)) 212 | self.btn_pause_surface.fill((0+self.button_press_reactor["pause"], 153, 76, 128)) 213 | self.btn_play_surface.fill((0+self.button_press_reactor["play"], 153, 76, 128)) 214 | self.btn_ff_surface.fill((0+self.button_press_reactor["ff"], 153, 76, 128)) 215 | 216 | for button in self.button_press_reactor: 217 | self.button_press_reactor[button] = max(0, self.button_press_reactor[button]-1) 218 | 219 | 220 | 221 | # --- Position update ---------------------------------------------- 222 | # Call some get_pos() function here by asking the Environment 223 | # Then, update the guiObjects' positions accordingly 224 | 225 | # RECV_UPDATE() function runs on its own thread now. 226 | # This is to accept asynchronous inputs from 227 | # (i) remote server and (ii) local keyboard input for zooming/panning. 228 | 229 | for obj in self.guiObjectsList: 230 | # Fill the surface of target and drone objects 231 | if (("predator" in obj.name) or (obj.name == "prey")): 232 | obj.surface.fill(WHITE) 233 | 234 | # --- guiObject update --- 235 | 236 | # Re-draw target circle 237 | cnt = 0 238 | for obj in self.guiObjectsList: 239 | if obj.name == "prey": 240 | obj.surface = pygame.transform.scale(obj.surface, (int(2*obj.z), int(2*obj.z))) 241 | if schedule[cnt] == True: 242 | pygame.draw.circle(obj.surface, obj.eye_color, (int(obj.z), int(obj.z)), int(obj.z), 0) 243 | else: 244 | 245 | pygame.draw.circle(obj.surface, obj.color, (int(obj.z), int(obj.z)), int(obj.z), 0) 246 | # if schedule[cnt] == True: 247 | # pygame.draw.circle(obj.surface, obj.eye_color, (int(obj.z), int(obj.z)), int(obj.z), 3) 248 | cnt += 1 249 | 250 | # Re-draw drone objects 251 | cnt = 0 252 | for obj in self.guiObjectsList: 253 | if "predator" in obj.name: 254 | # Re-scale each surface so that each guiObject can fit in it 255 | obj.surface = pygame.transform.scale(obj.surface, (int(2*obj.z), int(2*obj.z))) 256 | 257 | # Re-draw objects according to z-coordinate (their size will vary) 258 | pygame.draw.circle(obj.surface, obj.body_color, (int(obj.z), int(obj.z)), int(obj.z), 0) 259 | 260 | # if schedule[cnt] == 1: 261 | # pygame.draw.circle(obj.surface, obj.eye_color, (int(obj.z), int(obj.z)), int(obj.z), 3) 262 | cnt += 1 263 | # --- Canvas update --- 264 | # Re-drawing is called "blitting"! 265 | 266 | # Blit hierarchy follows this order: 267 | # [BOTTOM LEVEL] <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< [TOP LEVEL] 268 | # guiObject.label <<< guiObject.surface <<< movable_surface <<< display_surface 269 | 270 | # guiObject.surface.blit(guiObject.label, [position]) : write label on object's surface 271 | # movable_surface.blit(guiObject.surface, [position]) : draw object's surface onto movable surface 272 | # display_surface.blit(movable_surface, [position]) : draw movable surface onto a position-fixed display surface 273 | 274 | # Bottom-level blit 275 | for guiObject in self.guiObjectsList: 276 | # Target 277 | if guiObject.name == "prey": 278 | guiObject.surface.blit(guiObject.label, (int(guiObject.sx/2 - guiObject.font.size(guiObject.text)[0]/2), int(guiObject.sy/2 - guiObject.font.size(guiObject.text)[1]/2))) 279 | 280 | # Drones 281 | elif "predator" in guiObject.name: 282 | guiObject.surface.blit(guiObject.label, (int(guiObject.z - guiObject.font.size(guiObject.text)[0]/2), int(guiObject.z - guiObject.font.size(guiObject.text)[1]/2))) 283 | 284 | # Note that we no longer have updates from the socket 285 | # Now we have positions updated directly from the vector POSITIONS 286 | # Positions update before blitting onto the movable surface 287 | for i in range(len(self.guiObjectsList)): 288 | self.guiObjectsList[i].x = positions[2*i]*self.locator + int(self.locator/2) 289 | self.guiObjectsList[i].y = positions[2*i+1]*self.locator + int(self.locator/2) 290 | 291 | # Mid-level blit 292 | 293 | # Writing the message onto the message surface 294 | self.text = str(msg) 295 | self.label = self.font.render(self.text, True, (255, 255, 255)) 296 | self.message_surface.blit(self.label, (8, 4)) 297 | 298 | # Draw the grid lines 299 | for i in range(self.map_size): 300 | pygame.draw.line(self.movable_surface, GREY, (i*self.locator, 0), (i*self.locator, edge_len_pix)) 301 | pygame.draw.line(self.movable_surface, GREY, (0, i*self.locator), (edge_len_pix, i*self.locator)) 302 | 303 | for obj in self.guiObjectsList: 304 | # Target and Drones 305 | if obj.name == "prey": 306 | self.movable_surface.blit(obj.surface, (int(obj.x - obj.z), int(obj.y - obj.z))) 307 | elif "predator" in obj.name: 308 | self.movable_surface.blit(obj.surface, (int(obj.x - obj.z), int(obj.y - obj.z))) 309 | elif (obj.name == "center"): 310 | self.movable_surface.blit(obj.surface, (int(self.framex/2 - obj.sx/2), int(self.framey/2 - obj.sy/2))) 311 | 312 | # Top-level blit 313 | 314 | # Blitting the movable surface onto the display surface 315 | self.display_surface.blit(pygame.transform.scale(self.movable_surface, (int(self.wx), int(self.wy))), (int((self.framex - self.wx)/2 + self.sx), int((self.framey - self.wy)/2 + self.sy))) 316 | 317 | # Blitting the message surface onto the display surface 318 | self.display_surface.blit(self.message_surface, (0, 0)) 319 | 320 | if done: 321 | self.display_surface.blit(self.done_surface, (0, 0)) 322 | 323 | # Re-draw buttons 324 | self.button("PAUSE", self.vmargin, self.framey - self.vmargin - self.button_size_px, self.button_size_px, self.button_size_px, (0, 255, 0, 128), GREEN, self.btn_pause_surface) 325 | self.button("PLAY", 2*self.vmargin + self.button_size_px, self.framey - self.vmargin - self.button_size_px, self.button_size_px, self.button_size_px, (0, 255, 0, 128), GREEN, self.btn_play_surface) 326 | self.button("FF", 3*self.vmargin + 2*self.button_size_px, self.framey - self.vmargin - self.button_size_px, self.button_size_px, self.button_size_px, (0, 255, 0, 128), GREEN, self.btn_ff_surface) 327 | 328 | pygame.display.update() 329 | 330 | 331 | if self.button_value == 1: 332 | sleep(0.5) 333 | 334 | elif self.button_value == 0: 335 | self.button_value = 3 336 | while self.button_value == 3: 337 | sleep(0.1) 338 | mouse_pos = pygame.mouse.get_pos() 339 | for event in pygame.event.get(): 340 | if event.type == pygame.MOUSEBUTTONDOWN: 341 | 342 | # --- Buttons --- 343 | if self.framey - self.button_size_px - self.vmargin < mouse_pos[1] < self.framey - self.vmargin: 344 | # Pause button 345 | if self.vmargin < mouse_pos[0] < self.vmargin + self.button_size_px: 346 | if event.type == pygame.MOUSEBUTTONDOWN: 347 | self.button_press_reactor["pause"] = min(255, self.button_press_reactor["pause"] + 200) 348 | # sent = self.conn.send("pause") # TODO 349 | self.button_value = 0 350 | 351 | # Play button 352 | if 2 * self.vmargin + self.button_size_px < mouse_pos[ 353 | 0] < 2 * self.vmargin + 2 * self.button_size_px: 354 | if event.type == pygame.MOUSEBUTTONDOWN: 355 | self.button_press_reactor["play"] = min(255, self.button_press_reactor["play"] + 200) 356 | # sent = self.conn.send("play") # TODO 357 | self.button_value = 1 358 | 359 | # Fast-forward button 360 | if 3 * self.vmargin + 2 * self.button_size_px < mouse_pos[ 361 | 0] < 3 * self.vmargin + 3 * self.button_size_px: 362 | if event.type == pygame.MOUSEBUTTONDOWN: 363 | self.button_press_reactor["ff"] = min(255, self.button_press_reactor["ff"] + 200) 364 | # sent = self.conn.send("ff") # TODO 365 | self.button_value = 2 366 | 367 | 368 | return 0 369 | 370 | 371 | 372 | 373 | 374 | 375 | if __name__ == "__main__": 376 | canvas = Canvas(3, 2, 8) 377 | canvas.setup() 378 | canvas.draw(positions, schedule) 379 | 380 | -------------------------------------------------------------------------------- /Predator-Prey/envs/gui/guiObjects.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | # ---------------------------------------------------------------- 4 | # Purpose of Simple Simulator guiObjects is to provide abstraction 5 | # for the drawn target and drones on the Simple Simulator Canvas. 6 | # To use guiObjects, make an instance and blit it on a surface. 7 | # ---------------------------------------------------------------- 8 | 9 | import pygame 10 | from math import cos, sin, pi 11 | WHITE = (255, 255, 255) 12 | BLACK = (0, 0, 0) 13 | ORANGE = (255, 100, 0, 128) 14 | BLUE = (0, 128, 255, 128) 15 | RED = (255, 0, 0, 128) 16 | 17 | class guiPrey(): 18 | def __init__(self, xi = 1, yi = 1, zi = 30, ai = 10, prey_id = 0): 19 | # Take initial (x, y, z, radius) as argument 20 | self.x = xi 21 | self.y = yi 22 | self.z = zi 23 | # self.tr = tr 24 | self.a = ai 25 | self.a = None # Not used 26 | self.prey_id = prey_id 27 | self.name = "prey" 28 | 29 | # Label the prey 30 | self.color = ORANGE 31 | self.eye_color = RED 32 | self.fs = 15 # font size 33 | self.font = pygame.font.SysFont(pygame.font.get_default_font(), self.fs) 34 | self.text = "" 35 | self.label = self.font.render(self.text, True, BLACK) 36 | 37 | def setup(self, sx = 25, sy = 25): 38 | # Set up prey's surface 39 | self.sx = sx 40 | self.sy = sy 41 | self.surface = pygame.Surface((self.sx, self.sy), pygame.SRCALPHA) 42 | self.surface = self.surface.convert_alpha() 43 | 44 | class guiPred(): 45 | def __init__(self, xi = 1, yi = 1, zi = 30, ai = 0, pred_id = 0): 46 | # Take initial (x, y, z, yaw, drone_id) as argument 47 | self.x = xi 48 | self.y = yi 49 | self.z = zi 50 | self.a = ai 51 | self.pred_id = pred_id 52 | self.name = "predator" + str(pred_id) 53 | 54 | # Label the drone 55 | self.body_color = BLUE 56 | self.eye_color = RED 57 | self.fs = 32 # font size 58 | self.font = pygame.font.SysFont(pygame.font.get_default_font(), self.fs) 59 | self.text = str(pred_id) 60 | self.label = self.font.render(self.text, True, BLACK) 61 | 62 | # Misc. 63 | self.eye_size = 0.4 64 | 65 | def setup(self, sx = 25, sy = 25): 66 | # Set up predator's surface 67 | self.sx = sx 68 | self.sy = sy 69 | self.surface = pygame.Surface((self.sx, self.sy), pygame.SRCALPHA) 70 | self.surface = self.surface.convert_alpha() 71 | -------------------------------------------------------------------------------- /Predator-Prey/envs/scenario.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # defines scenario upon which the world is built 4 | class BaseScenario(object): 5 | # create elements of the world 6 | def make_world(self): 7 | raise NotImplementedError() 8 | # create initial conditions of the world 9 | def reset_world(self, world): 10 | raise NotImplementedError() 11 | -------------------------------------------------------------------------------- /Predator-Prey/envs/scenarios/__init__.py: -------------------------------------------------------------------------------- 1 | import imp 2 | import os.path as osp 3 | 4 | 5 | def load(name): 6 | pathname = osp.join(osp.dirname(__file__), name) 7 | return imp.load_source('', pathname) 8 | -------------------------------------------------------------------------------- /Predator-Prey/envs/scenarios/endless.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import deque 3 | from envs.grid_core import World 4 | from envs.grid_core import CoreAgent as Agent 5 | from envs.scenario import BaseScenario 6 | import config 7 | 8 | FLAGS = config.flags.FLAGS 9 | 10 | n_predator = FLAGS.n_predator 11 | n_prey = FLAGS.n_prey 12 | map_size = FLAGS.map_size 13 | 14 | class Prey(Agent): 15 | def __init__(self): 16 | super(Prey, self).__init__("prey", "green") 17 | self._movement_mask = np.array( 18 | [[0,1,0], 19 | [1,0,1], 20 | [0,1,0]], dtype=np.int8) 21 | 22 | def cannot_move(self): 23 | minimap = (self._obs[:,:,0] != 0) 24 | return np.sum(minimap*self._movement_mask)==4 25 | 26 | def can_observe_predator(self): 27 | shape = np.shape(self._obs[:,:,0]) 28 | obs_size = shape[0]*shape[1] 29 | obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size) 30 | ret = np.shape(np.where(obs == 3))[1] > 0 31 | return ret 32 | 33 | def can_observe_two_predator(self): 34 | shape = np.shape(self._obs[:,:,0]) 35 | obs_size = shape[0]*shape[1] 36 | obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size) 37 | ret = np.shape(np.where(obs == 3))[1] > 1 38 | return ret 39 | 40 | class Predator(Agent): 41 | def __init__(self): 42 | super(Predator, self).__init__("predator", "blue") 43 | self._obs = deque(maxlen=FLAGS.history_len) 44 | self.obs_range = 1 45 | 46 | def can_observe_prey(self): 47 | shape = np.shape(self._obs) 48 | obs_size = shape[1]*shape[2] 49 | obs = np.reshape(self._obs, obs_size) 50 | ret = np.shape(np.where(obs == 4))[1] > 0 51 | return ret 52 | 53 | def update_obs(self, obs): 54 | self._obs.append(obs[:,:,0]) # use only the first channel 55 | 56 | def fill_obs(self): 57 | # fill the whole history with the current observation 58 | for i in range(FLAGS.history_len-1): 59 | self._obs.append(self._obs[-1]) 60 | 61 | class Scenario(BaseScenario): 62 | def __init__(self): 63 | self.prey_captured = False 64 | 65 | def make_world(self): 66 | world = World(width=map_size, height=map_size) 67 | 68 | agents = [] 69 | self.atype_to_idx = { 70 | "predator": [], 71 | "prey": [] 72 | } 73 | 74 | # add predators 75 | for i in xrange(n_predator): 76 | agents.append(Predator()) 77 | self.atype_to_idx["predator"].append(i) 78 | 79 | # add preys 80 | for i in xrange(n_prey): 81 | agents.append(Prey()) 82 | self.atype_to_idx["prey"].append(n_predator + i) 83 | 84 | world.agents = agents 85 | for i, agent in enumerate(world.agents): 86 | agent.id = i + 1 87 | agent.silent = True 88 | 89 | # make initial conditions 90 | self.reset_world(world) 91 | return world 92 | 93 | def reset_world(self, world): 94 | world.empty_grid() 95 | 96 | # randomly place agent 97 | for agent in world.agents: 98 | world.placeObj(agent) 99 | 100 | world.set_observations() 101 | 102 | # fill the history with current observation 103 | for i in self.atype_to_idx["predator"]: 104 | world.agents[i].fill_obs() 105 | 106 | self.prey_captured = False 107 | 108 | def reward(self, agent, world): 109 | if agent.itype == "predator": 110 | # if self.prey_captured: 111 | # # return max(10 - world.step_cnt, 0) 112 | # return 1 113 | # else: 114 | # reward = -0.01 115 | # for i in self.atype_to_idx["prey"]: 116 | # prey = world.agents[i] 117 | # if prey.cannot_move(): 118 | # reward = 1 119 | # world.resetObj(prey) 120 | # return reward 121 | # # kdw - Use this for large map size 122 | # # if agent.can_observe_prey(): 123 | # # reward = 0.0 124 | # return reward 125 | reward = -0.001 126 | 127 | for i in self.atype_to_idx["prey"]: 128 | prey = world.agents[i] 129 | if prey.can_observe_predator(): 130 | #world.resetObj(prey) 131 | reward += 0.1/FLAGS.n_predator 132 | return reward 133 | 134 | else: # if prey 135 | if agent.cannot_move(): 136 | return -1 137 | 138 | return 0 139 | 140 | def observation(self, agent, world): 141 | # print agent.get_obs.shape 142 | obs = np.array(agent.get_obs()).flatten() 143 | return obs 144 | 145 | def done(self, agent, world): 146 | if agent.itype == "prey": 147 | if agent.can_observe_predator(): 148 | world.resetObj(agent) 149 | return False 150 | #return self.prey_captured -------------------------------------------------------------------------------- /Predator-Prey/envs/scenarios/endless2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import deque 3 | from envs.grid_core import World 4 | from envs.grid_core import CoreAgent as Agent 5 | from envs.scenario import BaseScenario 6 | import config 7 | 8 | FLAGS = config.flags.FLAGS 9 | 10 | n_predator = FLAGS.n_predator 11 | n_prey = FLAGS.n_prey 12 | map_size = FLAGS.map_size 13 | 14 | class Prey(Agent): 15 | def __init__(self): 16 | super(Prey, self).__init__("prey", "green") 17 | self._movement_mask = np.array( 18 | [[1,1,1], 19 | [1,0,1], 20 | [1,1,1]], dtype=np.int8) 21 | 22 | def cannot_move(self): 23 | minimap = (self._obs[:,:,0] != 0) 24 | return np.sum(minimap*self._movement_mask)==4 25 | 26 | def can_observe_predator(self): 27 | shape = np.shape(self._obs[:,:,0]) 28 | obs_size = shape[0]*shape[1] 29 | obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size) 30 | ret = np.shape(np.where(obs == 3))[1] > 0 31 | return ret 32 | 33 | def can_observe_two_predator(self): 34 | shape = np.shape(self._obs[:,:,0]) 35 | obs_size = shape[0]*shape[1] 36 | obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size) 37 | ret = np.shape(np.where(obs == 3))[1] > 1 38 | return ret 39 | 40 | class Predator(Agent): 41 | def __init__(self): 42 | super(Predator, self).__init__("predator", "blue") 43 | self._obs = deque(maxlen=FLAGS.history_len) 44 | self.obs_range = 1 45 | 46 | def can_observe_prey(self): 47 | shape = np.shape(self._obs) 48 | obs_size = shape[1]*shape[2] 49 | obs = np.reshape(self._obs, obs_size) 50 | ret = np.shape(np.where(obs == 4))[1] > 0 51 | return ret 52 | 53 | def update_obs(self, obs): 54 | self._obs.append(obs[:,:,0]) # use only the first channel 55 | 56 | def fill_obs(self): 57 | # fill the whole history with the current observation 58 | for i in range(FLAGS.history_len-1): 59 | self._obs.append(self._obs[-1]) 60 | 61 | class Scenario(BaseScenario): 62 | def __init__(self): 63 | self.prey_captured = False 64 | 65 | def make_world(self): 66 | world = World(width=map_size, height=map_size) 67 | 68 | agents = [] 69 | self.atype_to_idx = { 70 | "predator": [], 71 | "prey": [] 72 | } 73 | 74 | # add predators 75 | for i in xrange(n_predator): 76 | agents.append(Predator()) 77 | self.atype_to_idx["predator"].append(i) 78 | 79 | # add preys 80 | for i in xrange(n_prey): 81 | agents.append(Prey()) 82 | self.atype_to_idx["prey"].append(n_predator + i) 83 | 84 | world.agents = agents 85 | for i, agent in enumerate(world.agents): 86 | agent.id = i + 1 87 | agent.silent = True 88 | 89 | # make initial conditions 90 | self.reset_world(world) 91 | return world 92 | 93 | def reset_world(self, world): 94 | world.empty_grid() 95 | 96 | # randomly place agent 97 | for agent in world.agents: 98 | world.placeObj(agent) 99 | 100 | world.set_observations() 101 | 102 | # fill the history with current observation 103 | for i in self.atype_to_idx["predator"]: 104 | world.agents[i].fill_obs() 105 | 106 | self.prey_captured = False 107 | 108 | def reward(self, agent, world): 109 | if agent.itype == "predator": 110 | # if self.prey_captured: 111 | # # return max(10 - world.step_cnt, 0) 112 | # return 1 113 | # else: 114 | # reward = -0.01 115 | # for i in self.atype_to_idx["prey"]: 116 | # prey = world.agents[i] 117 | # if prey.cannot_move(): 118 | # reward = 1 119 | # world.resetObj(prey) 120 | # return reward 121 | # # kdw - Use this for large map size 122 | # # if agent.can_observe_prey(): 123 | # # reward = 0.0 124 | # return reward 125 | reward = -0.01/FLAGS.n_predator 126 | 127 | for i in self.atype_to_idx["prey"]: 128 | prey = world.agents[i] 129 | if prey.can_observe_two_predator(): 130 | #world.resetObj(prey) 131 | reward += 1.0/FLAGS.n_predator 132 | return reward 133 | 134 | else: # if prey 135 | if agent.cannot_move(): 136 | return -1 137 | 138 | return 0 139 | 140 | def observation(self, agent, world): 141 | # print agent.get_obs.shape 142 | obs = np.array(agent.get_obs()).flatten() 143 | return obs 144 | 145 | def done(self, agent, world): 146 | if agent.itype == "prey": 147 | if agent.can_observe_predator(): 148 | world.resetObj(agent) 149 | return False 150 | #return self.prey_captured -------------------------------------------------------------------------------- /Predator-Prey/envs/scenarios/endless3.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import deque 3 | from envs.grid_core import World 4 | from envs.grid_core import CoreAgent as Agent 5 | from envs.scenario import BaseScenario 6 | import config 7 | 8 | FLAGS = config.flags.FLAGS 9 | 10 | n_predator = FLAGS.n_predator 11 | n_prey = FLAGS.n_prey 12 | n_prey1 = FLAGS.n_prey1 13 | n_prey2 = FLAGS.n_prey2 14 | map_size = FLAGS.map_size 15 | penalty = FLAGS.penalty 16 | 17 | class Prey(Agent): 18 | def __init__(self): 19 | super(Prey, self).__init__("prey", "green") 20 | self._movement_mask = np.array( 21 | [[0,1,0], 22 | [1,0,1], 23 | [0,1,0]], dtype=np.int8) 24 | 25 | def cannot_move(self): 26 | minimap = (self._obs[:,:,0] != 0) 27 | return np.sum(minimap*self._movement_mask)==4 28 | 29 | def can_observe_predator(self): 30 | shape = np.shape(self._obs[:,:,0]) 31 | obs_size = shape[0]*shape[1] 32 | obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size) 33 | ret = np.shape(np.where(obs == 3))[1] > 0 34 | return ret 35 | 36 | def can_observe_two_predator(self): 37 | shape = np.shape(self._obs[:,:,0]) 38 | obs_size = shape[0]*shape[1] 39 | obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size) 40 | ret = np.shape(np.where(obs == 3))[1] > 1 41 | return ret 42 | 43 | def can_observe_three_predator(self): 44 | shape = np.shape(self._obs[:,:,0]) 45 | obs_size = shape[0]*shape[1] 46 | obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size) 47 | ret = np.shape(np.where(obs == 3))[1] > 2 48 | return ret 49 | 50 | class Prey2(Agent): 51 | def __init__(self): 52 | super(Prey2, self).__init__("prey2", "red") 53 | self._movement_mask = np.array( 54 | [[0,1,0], 55 | [1,0,1], 56 | [0,1,0]], dtype=np.int8) 57 | 58 | def cannot_move(self): 59 | minimap = (self._obs[:,:,0] != 0) 60 | return np.sum(minimap*self._movement_mask)==4 61 | 62 | def can_observe_predator(self): 63 | shape = np.shape(self._obs[:,:,0]) 64 | obs_size = shape[0]*shape[1] 65 | obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size) 66 | ret = np.shape(np.where(obs == 3))[1] > 0 67 | return ret 68 | 69 | def can_observe_two_predator(self): 70 | shape = np.shape(self._obs[:,:,0]) 71 | obs_size = shape[0]*shape[1] 72 | obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size) 73 | ret = np.shape(np.where(obs == 3))[1] > 1 74 | return ret 75 | 76 | def can_observe_three_predator(self): 77 | shape = np.shape(self._obs[:,:,0]) 78 | obs_size = shape[0]*shape[1] 79 | obs = np.reshape(self._obs[:,:,0] *self._movement_mask, obs_size) 80 | ret = np.shape(np.where(obs == 3))[1] > 2 81 | return ret 82 | 83 | class Predator(Agent): 84 | def __init__(self): 85 | super(Predator, self).__init__("predator", "blue") 86 | self._obs = deque(maxlen=FLAGS.history_len) 87 | self.obs_range = 1 88 | 89 | def can_observe_prey(self): 90 | shape = np.shape(self._obs) 91 | obs_size = shape[1]*shape[2] 92 | obs = np.reshape(self._obs, obs_size) 93 | ret = np.shape(np.where(obs > 3))[1] > 0 94 | return ret 95 | 96 | def update_obs(self, obs): 97 | self._obs.append(obs[:,:,0]) # use only the first channel 98 | 99 | def fill_obs(self): 100 | # fill the whole history with the current observation 101 | for i in range(FLAGS.history_len-1): 102 | self._obs.append(self._obs[-1]) 103 | 104 | class Scenario(BaseScenario): 105 | def __init__(self): 106 | self.prey_captured = False 107 | 108 | def make_world(self): 109 | world = World(width=map_size, height=map_size) 110 | 111 | agents = [] 112 | self.atype_to_idx = { 113 | "predator": [], 114 | "prey": [], 115 | "prey2": [] 116 | } 117 | 118 | # add predators 119 | for i in xrange(n_predator): 120 | agents.append(Predator()) 121 | self.atype_to_idx["predator"].append(i) 122 | 123 | # add preys 124 | for i in xrange(n_prey1): 125 | agents.append(Prey()) 126 | self.atype_to_idx["prey"].append(n_predator + i) 127 | 128 | for i in xrange(n_prey2): 129 | agents.append(Prey2()) 130 | self.atype_to_idx["prey2"].append(n_predator + n_prey1 + i) 131 | 132 | world.agents = agents 133 | for i, agent in enumerate(world.agents): 134 | agent.id = i + 1 135 | agent.silent = True 136 | 137 | # make initial conditions 138 | self.reset_world(world) 139 | return world 140 | 141 | def reset_world(self, world): 142 | world.empty_grid() 143 | 144 | # randomly place agent 145 | for agent in world.agents: 146 | world.placeObj(agent) 147 | 148 | world.set_observations() 149 | 150 | # fill the history with current observation 151 | for i in self.atype_to_idx["predator"]: 152 | world.agents[i].fill_obs() 153 | 154 | self.prey_captured = False 155 | 156 | def reward(self, agent, world): 157 | if agent.itype == "predator": 158 | reward = 0. 159 | count = 0 160 | for i in self.atype_to_idx["prey"]: 161 | # reward += -0.01 162 | prey = world.agents[i] 163 | # if prey.can_observe_three_predator(): 164 | # reward += 10.0 165 | if prey.can_observe_predator(): 166 | reward += +1.0 167 | # print "WIN" 168 | # print "CATCH" 169 | # elif prey.can_observe_predator(): 170 | # # print "LOSE" 171 | # # reward += 0. 172 | # reward += +penalty/10. 173 | # if penalty > 10: 174 | # reward += (penalty-10)/10. 175 | # # else: 176 | # reward += +1. 177 | # if prey.can_observe_predator(): 178 | # count += 1 179 | for i in self.atype_to_idx["prey2"]: 180 | # reward += -0.01 181 | prey = world.agents[i] 182 | # if prey.can_observe_three_predator(): 183 | # reward += 10.0 184 | if prey.can_observe_two_predator(): 185 | reward += 1.0 186 | # print "WIN" 187 | # print "CATCH" 188 | elif prey.can_observe_predator(): 189 | # print "LOSE" 190 | # reward += 0. 191 | reward += -penalty/10. 192 | # if reward > 1: 193 | # print "CATCH" 194 | # if count > 1: 195 | # reward += 1.0 196 | # elif count == 1: 197 | # reward += -penalty/10. 198 | # else: 199 | # reward += 0. 200 | 201 | 202 | return reward/(n_predator) 203 | 204 | else: # if prey 205 | if agent.cannot_move(): 206 | return 0 207 | 208 | return 0 209 | 210 | def observation(self, agent, world): 211 | # print agent.get_obs.shape 212 | obs = np.array(agent.get_obs()).flatten() 213 | return obs 214 | 215 | def done(self, agent, world): 216 | if agent.itype == "prey": 217 | if agent.can_observe_predator(): 218 | # world.resetObj(agent) 219 | return True 220 | if agent.itype == "prey2": 221 | if agent.can_observe_two_predator(): 222 | # world.resetObj(agent) 223 | return True 224 | # if agent.itype == "predator": 225 | # if agent.can_observe_prey(): 226 | # # world.resetObj(agent) 227 | # return True 228 | return False 229 | #return self.prey_captured 230 | -------------------------------------------------------------------------------- /Predator-Prey/envs/scenarios/pursuit.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import deque 3 | from envs.grid_core import World 4 | from envs.grid_core import CoreAgent as Agent 5 | from envs.scenario import BaseScenario 6 | import config 7 | 8 | FLAGS = config.flags.FLAGS 9 | 10 | n_predator = FLAGS.n_predator 11 | n_prey = FLAGS.n_prey 12 | map_size = FLAGS.map_size 13 | 14 | class Prey(Agent): 15 | def __init__(self): 16 | super(Prey, self).__init__("prey", "green") 17 | self._movement_mask = np.array( 18 | [[0,1,0], 19 | [1,0,1], 20 | [0,1,0]], dtype=np.int8) 21 | 22 | def cannot_move(self): 23 | minimap = (self._obs[:,:,0] != 0) 24 | return np.sum(minimap*self._movement_mask)==4 25 | 26 | class Predator(Agent): 27 | def __init__(self): 28 | super(Predator, self).__init__("predator", "blue") 29 | self._obs = deque(maxlen=FLAGS.history_len) 30 | self.obs_range = 1 31 | 32 | def can_observe_prey(self): 33 | shape = np.shape(self._obs) 34 | obs_size = shape[1]*shape[2] 35 | obs = np.reshape(self._obs, obs_size) 36 | ret = np.shape(np.where(obs == 4))[1] > 0 37 | return ret 38 | 39 | def update_obs(self, obs): 40 | self._obs.append(obs[:,:,0]) # use only the first channel 41 | 42 | def fill_obs(self): 43 | # fill the whole history with the current observation 44 | for i in range(FLAGS.history_len-1): 45 | self._obs.append(self._obs[-1]) 46 | 47 | class Scenario(BaseScenario): 48 | def __init__(self): 49 | self.prey_captured = False 50 | 51 | def make_world(self): 52 | world = World(width=map_size, height=map_size) 53 | 54 | agents = [] 55 | self.atype_to_idx = { 56 | "predator": [], 57 | "prey": [] 58 | } 59 | 60 | # add predators 61 | for i in xrange(n_predator): 62 | agents.append(Predator()) 63 | self.atype_to_idx["predator"].append(i) 64 | 65 | # add preys 66 | for i in xrange(n_prey): 67 | agents.append(Prey()) 68 | self.atype_to_idx["prey"].append(n_predator + i) 69 | 70 | world.agents = agents 71 | for i, agent in enumerate(world.agents): 72 | agent.id = i + 1 73 | agent.silent = True 74 | 75 | # make initial conditions 76 | self.reset_world(world) 77 | return world 78 | 79 | def reset_world(self, world): 80 | world.empty_grid() 81 | 82 | # randomly place agent 83 | for agent in world.agents: 84 | world.placeObj(agent) 85 | 86 | world.set_observations() 87 | 88 | # fill the history with current observation 89 | for i in self.atype_to_idx["predator"]: 90 | world.agents[i].fill_obs() 91 | 92 | self.prey_captured = False 93 | 94 | def reward(self, agent, world): 95 | if agent.itype == "predator": 96 | if self.prey_captured: 97 | # return max(10 - world.step_cnt, 0) 98 | return 1 99 | else: 100 | reward = -0.01 101 | for i in self.atype_to_idx["prey"]: 102 | prey = world.agents[i] 103 | if prey.cannot_move(): 104 | # print "captured" 105 | self.prey_captured = True 106 | reward = 1 107 | return reward 108 | # kdw - Use this for large map size 109 | # if agent.can_observe_prey(): 110 | # reward = 0.0 111 | return reward 112 | else: # if prey 113 | if agent.cannot_move(): 114 | return -1 115 | 116 | return 0 117 | 118 | def observation(self, agent, world): 119 | # print agent.get_obs.shape 120 | obs = np.array(agent.get_obs()).flatten() 121 | return obs 122 | 123 | def done(self, agent, world): 124 | return self.prey_captured -------------------------------------------------------------------------------- /Predator-Prey/envs/scenarios/single_agent.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from envs.scenarios.pursuit import Scenario as BaseScenario 3 | import config 4 | 5 | FLAGS = config.flags.FLAGS 6 | map_size = FLAGS.map_size 7 | 8 | class Scenario(BaseScenario): 9 | def __init__(self): 10 | super(Scenario, self).__init__() 11 | print "Single agent scenario" 12 | 13 | def reset_world(self, world): 14 | world.empty_grid() 15 | 16 | prey_pos = [0, 0] 17 | 18 | prey_idx = self.atype_to_idx["prey"][0] 19 | world.placeObj(world.agents[prey_idx], top=prey_pos, size=(1,1)) 20 | 21 | top = ((prey_pos[0]+1)%map_size, (prey_pos[1]+1)%map_size) 22 | 23 | world.placeObj(world.agents[0], top=top, size=(2, 2)) 24 | world.placeObj(world.agents[1], top=[0, 1], size=(1, 1)) 25 | 26 | world.set_observations() 27 | 28 | # fill the history with current observation 29 | for i in self.atype_to_idx["predator"]: 30 | world.agents[i].fill_obs() 31 | 32 | self.prey_captured = False 33 | -------------------------------------------------------------------------------- /Predator-Prey/envs/scenarios/static_prey.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from envs.scenarios.pursuit import Scenario as BaseScenario 3 | import config 4 | 5 | FLAGS = config.flags.FLAGS 6 | map_size = FLAGS.map_size 7 | 8 | class Scenario(BaseScenario): 9 | def __init__(self): 10 | super(Scenario, self).__init__() 11 | 12 | def reset_world(self, world): 13 | world.empty_grid() 14 | 15 | # prey_pos = np.random.choice([map_size - 1, 0], 2) 16 | prey_pos = [0, 0] 17 | prey_idx = self.atype_to_idx["prey"][0] 18 | world.placeObj(world.agents[prey_idx], top=prey_pos, size=(1,1)) 19 | 20 | top = ((prey_pos[0]+1)%map_size, (prey_pos[1]+1)%map_size) 21 | for idx in self.atype_to_idx["predator"]: 22 | world.placeObj(world.agents[idx], top=top, size=(2,2)) 23 | 24 | world.set_observations() 25 | 26 | # fill the history with current observation 27 | for i in self.atype_to_idx["predator"]: 28 | world.agents[i].fill_obs() 29 | 30 | self.prey_captured = False 31 | -------------------------------------------------------------------------------- /Predator-Prey/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf8 3 | import logging 4 | import make_env 5 | import agents 6 | import config 7 | import time 8 | import random 9 | import tensorflow as tf 10 | import numpy as np 11 | 12 | FLAGS = config.flags.FLAGS 13 | 14 | def set_seed(seed): 15 | """Initialized the random seeds 16 | """ 17 | random.seed(seed) 18 | np.random.seed(seed) 19 | tf.set_random_seed(seed) 20 | return None 21 | 22 | 23 | if __name__ == '__main__': 24 | 25 | seed = FLAGS.seed 26 | set_seed(seed) 27 | print 'SEED', seed 28 | 29 | # === Logging setup === # 30 | logger_env = logging.getLogger('GridMARL') 31 | logger_agent = logging.getLogger('Agent') 32 | 33 | # === Program start === # 34 | # Load environment 35 | env = make_env.make_env(FLAGS.scenario) 36 | logger_env.info('GridMARL Start with %d predator(s) and %d prey(s)', FLAGS.n_predator, FLAGS.n_prey) 37 | 38 | # Load trainer 39 | logger_agent.info('Agent: {}'.format(FLAGS.agent)) 40 | trainer = agents.load(FLAGS.agent+"/trainer.py").Trainer(env) 41 | 42 | print FLAGS.agent, config.file_name 43 | 44 | # start learning 45 | if FLAGS.train: 46 | start_time = time.time() 47 | trainer.learn() 48 | finish_time = time.time() 49 | # trainer.test() 50 | print "TRAINING TIME (sec)", finish_time - start_time 51 | else: 52 | trainer.test() 53 | -------------------------------------------------------------------------------- /Predator-Prey/make_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for creating a multiagent environment with one of the scenarios listed 3 | in ./scenarios/. 4 | Can be called by using, for example: 5 | env = make_env('simple_speaker_listener') 6 | After producing the env object, can be used similarly to an OpenAI gym 7 | environment. 8 | 9 | A policy using this environment must output actions in the form of a list 10 | for all agents. Each element of the list should be a numpy array, 11 | of size (env.world.dim_p + env.world.dim_c, 1). Physical actions precede 12 | communication actions in this array. See environment.py for more details. 13 | """ 14 | 15 | def make_env(scenario_name, benchmark=False): 16 | ''' 17 | Creates a MultiAgentEnv object as env. This can be used similar to a gym 18 | environment by calling env.reset() and env.step(). 19 | Use env.render() to view the environment on the screen. 20 | 21 | Input: 22 | scenario_name : name of the scenario from ./scenarios/ to be Returns 23 | (without the .py extension) 24 | benchmark : whether you want to produce benchmarking data 25 | (usually only done during evaluation) 26 | 27 | Some useful env properties (see environment.py): 28 | .observation_space : Returns the observation space for each agent 29 | .action_space : Returns the action space for each agent 30 | .n : Returns the number of Agents 31 | ''' 32 | from envs.environment import MultiAgentEnv 33 | import envs.scenarios as scenarios 34 | 35 | # load scenario from script 36 | scenario = scenarios.load(scenario_name + ".py").Scenario() 37 | # create world 38 | world = scenario.make_world() 39 | # create multiagent environment 40 | if benchmark: 41 | env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data) 42 | else: 43 | env = MultiAgentEnv(world, reset_callback=scenario.reset_world, 44 | reward_callback=scenario.reward, 45 | observation_callback=scenario.observation, 46 | done_callback=scenario.done) 47 | return env 48 | -------------------------------------------------------------------------------- /Predator-Prey/readme: -------------------------------------------------------------------------------- 1 | Training 2 | 3 | $algorithm = vdn, qmix, pqmix5(=QTRAN-alt in the paper), pqmix7(=QTRAN in the paper) 4 | 5 | (i) 2 Predator & 1 Prey (5X5 Map) with P=0.5 6 | 7 | python main.py --scenario endless3 --n_predator 2 --n_prey1 0 --n_prey2 1 --n_prey 1 --map_size 5 --agent pos_cac_fo --training_step 3000000 --testing_step 10000 --max_step 100 --b_size 600000 --df 0.99 --eval_step 100 --algorithm $algorithm --lr 0.0005 --seed 0 --penalty 5 --comment 215 8 | 9 | (ii) 4 Predator & 2 Prey (7X7 Map) with P=0.5 10 | 11 | python main.py --scenario endless3 --n_predator 4 --n_prey1 0 --n_prey2 2 --n_prey 2 --map_size 7 --agent pos_cac_fo --training_step 6000000 --testing_step 10000 --max_step 100 --b_size 1000000 --df 0.99 --eval_step 100 --algorithm $algorithm --lr 0.0005 --seed 0 --penalty 5 --comment 427 & 12 | 13 | 14 | -------------------------------------------------------------------------------- /Predator-Prey/run_DQN9.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | GPU=$1 4 | #for penalty in 0 2 4 6 8 10 12 14 5 | for seed in 28 29 30 31 32 #12 13 6 | #for penalty in 5 10 15 7 | do 8 | for penalty in 5 9 | do 10 | 11 | #CUDA_VISIBLE_DEVICES=$1 python main.py --scenario endless3 --n_predator 2 --n_prey1 0 --n_prey2 1 --n_prey 1 --map_size 5 --agent $2 --training_step 3000000 --testing_step 10000 --max_step 100 --b_size 600000 --df 0.99 --eval_step 100 --algorithm $3 --lr 0.0005 --seed $seed --penalty $penalty --comment "$4"215 & 12 | 13 | 14 | 15 | 16 | 17 | 18 | #for seed in 401 402 403 404 405 19 | #do 20 | 21 | #CUDA_VISIBLE_DEVICES=$1 python main.py --scenario endless3 --n_predator 3 --n_prey1 0 --n_prey2 2 --n_prey 2 --map_size 6 --agent $2 --training_step 3000000 --testing_step 10000 --max_step 100 --b_size 600000 --df 0.99 --eval_step 100 --algorithm $3 --lr 0.0005 --seed $seed --penalty $penalty --comment "$4"326 & 22 | 23 | CUDA_VISIBLE_DEVICES=$1 python main.py --scenario endless3 --n_predator 4 --n_prey1 0 --n_prey2 2 --n_prey 2 --map_size 7 --agent $2 --training_step 6000000 --testing_step 10000 --max_step 100 --b_size 1000000 --df 0.99 --eval_step 100 --algorithm $3 --lr 0.0005 --seed $seed --penalty $penalty --comment "$4"427 & 24 | 25 | 26 | # CUDA_VISIBLE_DEVICES=$1 python main.py --scenario endless3 --n_predator 3 --n_prey 2 --map_size 7 --agent $2 --training_step 3000000 --testing_step 10000 --max_step 100 --b_size 500000 --df 0.99 --eval_step 100 --algorithm $3 --lr 0.0001 --seed $seed --penalty $penalty --beta $4 --comment "$4"-326 & 27 | 28 | #CUDA_VISIBLE_DEVICES=$1 python main.py --scenario endless3 --n_predator 4 --n_prey 2 --map_size 8 --agent $2 --training_step 3000000 --testing_step 10000 --max_step 100 --b_size 500000 --df 0.99 --eval_step 100 --algorithm $3 --lr 0.0001 --seed $seed --penalty $penalty --comment "$4"428 & 29 | 30 | #done 31 | 32 | done 33 | done 34 | -------------------------------------------------------------------------------- /Predator-Prey/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os,sys 3 | sys.path.insert(1, os.path.join(sys.path[0], '..')) 4 | import argparse 5 | 6 | from envs.environment import MultiAgentEnv 7 | import envs.scenarios as scenarios 8 | import numpy as np 9 | import config 10 | 11 | FLAGS = config.flags.FLAGS 12 | 13 | 14 | if __name__ == '__main__': 15 | # parse arguments 16 | parser = argparse.ArgumentParser(description=None) 17 | parser.add_argument('-s', '--scenario', default='pursuit.py', help='Path of the scenario Python script.') 18 | args = parser.parse_args() 19 | 20 | # load scenario from script 21 | scenario = scenarios.load(args.scenario).Scenario() 22 | # create world 23 | world = scenario.make_world() 24 | # create multiagent environment 25 | env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, done_callback=scenario.done) 26 | act_n = [2, 2] 27 | print "action space:", env.action_space[0].n 28 | print "observation space:", env.observation_space 29 | 30 | obs_n = env.reset()[:2] 31 | print env.get_agent_profile() 32 | print env.get_full_encoding()[:, :, 2] 33 | imap = np.array(obs_n).reshape((2, FLAGS.history_len,3,3,1)) 34 | 35 | minimap = imap[:,:,:,:,0] 36 | print minimap[0, -1] 37 | print minimap[1, -1] 38 | 39 | while True: 40 | a0 = input("action of agent 0:") 41 | a1 = input("action of agent 1:") 42 | act_n = [a0, a1, 2] 43 | obs_n, reward_n, done_n, info_n = env.step(act_n) 44 | obs_n = obs_n[:2] 45 | 46 | 47 | print env.get_full_encoding()[:,:,2] 48 | imap = np.array(obs_n).reshape((2, FLAGS.history_len,3,3,1)) 49 | 50 | minimap = imap[:,:,:,:,0] 51 | print minimap[0, -1] 52 | print minimap[1, -1] 53 | 54 | 55 | print reward_n, done_n 56 | 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # QTRAN: Learning to Factorize with Transformation for Cooperative Multi-Agent Reinforcement Learning 2 | 3 | There will be additional updates later 4 | 5 | ## Predator-prey 6 | 7 | Training 8 | 9 | $algorithm = vdn, qmix, pqmix5(=QTRAN-alt in the paper), pqmix7(=QTRAN in the paper) 10 | 11 | (i) 2 Predator & 1 Prey (5X5 Map) with P=0.5 12 | 13 | python main.py --scenario endless3 --n_predator 2 --n_prey1 0 --n_prey2 1 --n_prey 1 --map_size 5 --agent pos_cac_fo --training_step 3000000 --testing_step 10000 --max_step 100 --b_size 600000 --df 0.99 --eval_step 100 --algorithm $algorithm --lr 0.0005 --seed 0 --penalty 5 --comment 215 14 | 15 | (ii) 4 Predator & 2 Prey (7X7 Map) with P=0.5 16 | 17 | python main.py --scenario endless3 --n_predator 4 --n_prey1 0 --n_prey2 2 --n_prey 2 --map_size 7 --agent pos_cac_fo --training_step 6000000 --testing_step 10000 --max_step 100 --b_size 1000000 --df 0.99 --eval_step 100 --algorithm $algorithm --lr 0.0005 --seed 0 --penalty 5 --comment 427 & 18 | 19 | 20 | ## Others 21 | 22 | Training 23 | 24 | $algorithm = vdn, qmix, pqmix5(=QTRAN-alt in the paper), pqmix7(=QTRAN in the paper) 25 | 26 | python main.py --agent pos_cac_fo --training_step 10000 --b_size 10000 --m_size 32 --seed 0 --algorithm $algorithm --penalty 0 27 | 28 | 29 | In make_env.py 30 | 31 | (i) Matrix game 32 | 33 | from envs.environment import MultiAgentSimpleEnv2 as MAS 34 | 35 | (i) Gaussian Squeeze 36 | 37 | from envs.environment import MultiAgentSimpleEnv4 as MAS 38 | --------------------------------------------------------------------------------