├── README.md
├── agents
    ├── __pycache__
    │   ├── __init__.cpython-37.pyc
    │   ├── models.cpython-37.pyc
    │   ├── policies.cpython-37.pyc
    │   └── utils.cpython-37.pyc
    ├── models.py
    ├── policies.py
    └── utils.py
├── config
    ├── config_greedy.ini
    ├── config_ia2c.ini
    ├── config_ia2c_cu.ini
    ├── config_ia2c_fp.ini
    ├── config_ma2c_dial.ini
    ├── config_ma2c_ic3.ini
    └── config_ma2c_nc.ini
├── envs
    ├── __pycache__
    │   ├── __init__.cpython-37.pyc
    │   ├── env.cpython-37.pyc
    │   └── large_grid_env.cpython-37.pyc
    ├── data
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   └── build_file.cpython-37.pyc
    │   ├── build_file.py
    │   ├── intersection.pdf
    │   ├── network.pdf
    │   └── view.xml
    ├── env.py
    └── large_grid_env.py
├── main.py
└── utils.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Networked Multi-agent Deep RL
 2 | This repo implements the state-of-the-art methods for deep RL in a networked multi-agent system, with observability and communication of each agent limited to its neighborhood. For fair comparison, all methods are applied to A2C agents.
 3 | Under construction ...
 4 | 
 5 | Available IA2C algorithms:
 6 | * PolicyInferring: [Lowe, Ryan, et al. "Multi-agent actor-critic for mixed cooperative-competitive environments." Advances in Neural Information Processing Systems, 2017.](https://papers.nips.cc/paper/7217-multi-agent-actor-critic-for-mixed-cooperative-competitive-environments.pdf)
 7 | * FingerPrint: [Foerster, Jakob, et al. "Stabilising experience replay for deep multi-agent reinforcement learning." arXiv preprint arXiv:1702.08887, 2017.](https://arxiv.org/pdf/1702.08887.pdf)
 8 | * ConsensusUpdate: [Zhang, Kaiqing, et al. "Fully decentralized multi-agent reinforcement learning with networked agents." arXiv preprint arXiv:1802.08757, 2018.](https://arxiv.org/pdf/1802.08757.pdf)
 9 | 
10 | 
11 | Available MA2C algorithms:
12 | * DIAL: [Foerster, Jakob, et al. "Learning to communicate with deep multi-agent reinforcement learning." Advances in Neural Information Processing Systems. 2016.](http://papers.nips.cc/paper/6042-learning-to-communicate-with-deep-multi-agent-reinforcement-learning.pdf)
13 | * CommNet: [Sukhbaatar, Sainbayar, et al. "Learning multiagent communication with backpropagation." Advances in Neural Information Processing Systems, 2016.](https://arxiv.org/pdf/1605.07736.pdf)
14 | * NeurComm: [Gilmer, Justin, et al. "Neural message passing for quantum chemistry." arXiv preprint arXiv:1704.01212, 2017.](https://arxiv.org/pdf/1704.01212.pdf)
15 | 
16 | ## Requirements
17 | * Python3
18 | * [Tensorflow](http://www.tensorflow.org/install)
19 | * [SUMO](http://sumo.dlr.de/wiki/Installing)
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/agents/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MUmarJaved/MultiAgent-Distributed-Reinforcement-Learning/d5a0d7226011f7050f643b66e51e141277dd0e58/agents/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/agents/__pycache__/models.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MUmarJaved/MultiAgent-Distributed-Reinforcement-Learning/d5a0d7226011f7050f643b66e51e141277dd0e58/agents/__pycache__/models.cpython-37.pyc


--------------------------------------------------------------------------------
/agents/__pycache__/policies.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MUmarJaved/MultiAgent-Distributed-Reinforcement-Learning/d5a0d7226011f7050f643b66e51e141277dd0e58/agents/__pycache__/policies.cpython-37.pyc


--------------------------------------------------------------------------------
/agents/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MUmarJaved/MultiAgent-Distributed-Reinforcement-Learning/d5a0d7226011f7050f643b66e51e141277dd0e58/agents/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/agents/models.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | from agents.utils import OnPolicyBuffer, MultiAgentOnPolicyBuffer, Scheduler
  4 | from agents.policies import (LstmPolicy, FPPolicy, ConsensusPolicy, NCMultiAgentPolicy,
  5 |                              IC3MultiAgentPolicy, DIALMultiAgentPolicy)
  6 | import logging
  7 | import numpy as np
  8 | import tensorflow as tf
  9 | 
 10 | 
 11 | class IA2C:
 12 |     """
 13 |     The basic IA2C implementation with decentralized actor and centralized critic,
 14 |     limited to neighborhood area only.
 15 |     """
 16 |     def __init__(self, n_s_ls, n_a, neighbor_mask, distance_mask, coop_gamma,
 17 |                  total_step, model_config, seed=0):
 18 |         self.name = 'ia2c'
 19 |         self._init_algo(n_s_ls, n_a, neighbor_mask, distance_mask, coop_gamma,
 20 |                         total_step, seed, model_config)
 21 | 
 22 |     def add_transition(self, ob, naction, action, reward, value, done):
 23 |         if self.reward_norm > 0:
 24 |             reward = reward / self.reward_norm
 25 |         if self.reward_clip > 0:
 26 |             reward = np.clip(reward, -self.reward_clip, self.reward_clip)
 27 |         for i in range(self.n_agent):
 28 |             self.trans_buffer[i].add_transition(ob[i], naction[i], action[i], reward, value[i], done)
 29 | 
 30 |     def backward(self, Rends, dt, summary_writer=None, global_step=None):
 31 |         cur_lr = self.lr_scheduler.get(self.n_step)
 32 |         for i in range(self.n_agent):
 33 |             obs, nas, acts, dones, Rs, Advs = self.trans_buffer[i].sample_transition(Rends[i], dt)
 34 |             if i == 0:
 35 |                 self.policy[i].backward(self.sess, obs, nas, acts, dones, Rs, Advs, cur_lr,
 36 |                                         summary_writer=summary_writer, global_step=global_step)
 37 |             else:
 38 |                 self.policy[i].backward(self.sess, obs, nas, acts, dones, Rs, Advs, cur_lr)
 39 | 
 40 |     def forward(self, obs, done, nactions=None, out_type='p'):
 41 |         out = []
 42 |         if nactions is None:
 43 |             nactions = [None] * self.n_agent
 44 |         for i in range(self.n_agent): 
 45 |             cur_out = self.policy[i].forward(self.sess, obs[i], done, nactions[i], out_type)
 46 |             out.append(cur_out)
 47 |         return np.array(out)
 48 | 
 49 |     def load(self, model_dir, checkpoint=None):
 50 |         save_file = None
 51 |         save_step = 0
 52 |         if os.path.exists(model_dir):
 53 |             if checkpoint is None:
 54 |                 for file in os.listdir(model_dir):
 55 |                     if file.startswith('checkpoint'):
 56 |                         prefix = file.split('.')[0]
 57 |                         tokens = prefix.split('-')
 58 |                         if len(tokens) != 2:
 59 |                             continue
 60 |                         cur_step = int(tokens[1])
 61 |                         if cur_step > save_step:
 62 |                             save_file = prefix
 63 |                             save_step = cur_step
 64 |             else:
 65 |                 save_file = 'checkpoint-' + str(int(checkpoint))
 66 |         if save_file is not None:
 67 |             self.saver.restore(self.sess, model_dir + save_file)
 68 |             logging.info('Checkpoint loaded: %s' % save_file)
 69 |             return True
 70 |         logging.error('Can not find old checkpoint for %s' % model_dir)
 71 |         return False
 72 | 
 73 |     def save(self, model_dir, global_step):
 74 |         self.saver.save(self.sess, model_dir + 'checkpoint', global_step=global_step)
 75 | 
 76 |     def _init_algo(self, n_s_ls, n_a, neighbor_mask, distance_mask, coop_gamma,
 77 |                    total_step, seed, model_config):
 78 |         # init params
 79 |         if self.name.startswith('ia2c'):
 80 |             self.n_s_ls = n_s_ls
 81 |         else:
 82 |             self.n_s = n_s_ls
 83 |         self.n_a = n_a
 84 |         self.neighbor_mask = neighbor_mask
 85 |         self.n_agent = len(self.neighbor_mask)
 86 |         self.reward_clip = model_config.getfloat('reward_clip')
 87 |         self.reward_norm = model_config.getfloat('reward_norm')
 88 |         self.n_step = model_config.getint('batch_size')
 89 |         self.n_fc = model_config.getint('num_fc')
 90 |         self.n_lstm = model_config.getint('num_lstm')
 91 |         # init tf
 92 |         tf.reset_default_graph()
 93 |         tf.set_random_seed(seed)
 94 |         config = tf.ConfigProto(allow_soft_placement=True)
 95 |         self.sess = tf.Session(config=config)
 96 |         self.policy = self._init_policy()
 97 |         self.saver = tf.train.Saver(max_to_keep=5)
 98 |         # init exp buffer and lr scheduler for training
 99 |         if total_step:
100 |             self.total_step = total_step
101 |             self._init_train(model_config, distance_mask, coop_gamma)
102 |         self.sess.run(tf.global_variables_initializer())
103 | 
104 |     def _init_policy(self):
105 |         policy = []
106 |         for i in range(self.n_agent):
107 |             n_n = np.sum(self.neighbor_mask[i])
108 |             policy.append(LstmPolicy(self.n_s_ls[i], self.n_a, n_n, self.n_step,
109 |                                      n_fc=self.n_fc, n_lstm=self.n_lstm, name='%d' % i))
110 |         return policy
111 | 
112 |     def _init_scheduler(self, model_config):
113 |         # init lr scheduler
114 |         lr_init = model_config.getfloat('lr_init')
115 |         lr_decay = model_config.get('lr_decay')
116 |         if lr_decay == 'constant':
117 |             self.lr_scheduler = Scheduler(lr_init, decay=lr_decay)
118 |         else:
119 |             lr_min = model_config.getfloat('lr_min')
120 |             self.lr_scheduler = Scheduler(lr_init, lr_min, self.total_step, decay=lr_decay)
121 | 
122 |     def _init_train(self, model_config, distance_mask, coop_gamma):
123 |         # init lr scheduler
124 |         self._init_scheduler(model_config)
125 |         v_coef = model_config.getfloat('value_coef')
126 |         e_coef = model_config.getfloat('entropy_coef')
127 |         max_grad_norm = model_config.getfloat('max_grad_norm')
128 |         alpha = model_config.getfloat('rmsp_alpha')
129 |         epsilon = model_config.getfloat('rmsp_epsilon')
130 |         gamma = model_config.getfloat('gamma')
131 |         self.trans_buffer = []
132 |         for i in range(self.n_agent):
133 |             # init loss
134 |             self.policy[i].prepare_loss(v_coef, e_coef, max_grad_norm, alpha, epsilon)
135 |             # init replay buffer
136 |             self.trans_buffer.append(OnPolicyBuffer(gamma, coop_gamma, distance_mask[i]))
137 | 
138 | 
139 | class IA2C_FP(IA2C):
140 |     """
141 |     In fingerprint IA2C, neighborhood policies (fingerprints) are also included.
142 |     """
143 |     def __init__(self, n_s_ls, n_a, neighbor_mask, distance_mask, coop_gamma,
144 |                  total_step, model_config, seed=0):
145 |         self.name = 'ia2c_fp'
146 |         self._init_algo(n_s_ls, n_a, neighbor_mask, distance_mask, coop_gamma, 
147 |                         total_step, seed, model_config)
148 | 
149 |     def _init_policy(self):
150 |         policy = []
151 |         for i in range(self.n_agent):
152 |             n_n = np.sum(self.neighbor_mask[i])
153 |             # neighborhood policies are included in local state
154 |             n_s1 = self.n_s_ls[i] + self.n_a*n_n
155 |             policy.append(FPPolicy(n_s1, self.n_a, n_n, self.n_step, n_fc=self.n_fc,
156 |                                    n_lstm=self.n_lstm, name='%d' % i))
157 |         return policy
158 | 
159 | 
160 | class MA2C_NC(IA2C):
161 |     def __init__(self, n_s, n_a, neighbor_mask, distance_mask, coop_gamma,
162 |                  total_step, model_config, seed=0):
163 |         self.name = 'ma2c_nc'
164 |         self._init_algo(n_s, n_a, neighbor_mask, distance_mask, coop_gamma,
165 |                         total_step, seed, model_config)
166 | 
167 |     def add_transition(self, ob, p, action, reward, value, done):
168 |         if self.reward_norm > 0:
169 |             reward = reward / self.reward_norm
170 |         if self.reward_clip > 0:
171 |             reward = np.clip(reward, -self.reward_clip, self.reward_clip)
172 |         self.trans_buffer.add_transition(ob, p, action, reward, value, done)
173 | 
174 |     def backward(self, Rends, dt, summary_writer=None, global_step=None):
175 |         cur_lr = self.lr_scheduler.get(self.n_step)
176 |         obs, ps, acts, dones, Rs, Advs = self.trans_buffer.sample_transition(Rends, dt)
177 |         self.policy.backward(self.sess, obs, ps, acts, dones, Rs, Advs, cur_lr,
178 |                              summary_writer=summary_writer, global_step=global_step)
179 | 
180 |     def forward(self, obs, done, ps, actions=None, out_type='p'):
181 |         return self.policy.forward(self.sess, obs, done, ps, actions, out_type)
182 | 
183 |     def _init_policy(self):
184 |         return NCMultiAgentPolicy(self.n_s, self.n_a, self.n_agent, self.n_step,
185 |                                   self.neighbor_mask, n_fc=self.n_fc, n_h=self.n_lstm)
186 | 
187 |     def _init_train(self, model_config, distance_mask, coop_gamma):
188 |         # init lr scheduler
189 |         self._init_scheduler(model_config)
190 |         v_coef = model_config.getfloat('value_coef')
191 |         e_coef = model_config.getfloat('entropy_coef')
192 |         max_grad_norm = model_config.getfloat('max_grad_norm')
193 |         alpha = model_config.getfloat('rmsp_alpha')
194 |         epsilon = model_config.getfloat('rmsp_epsilon')
195 |         gamma = model_config.getfloat('gamma')
196 |         # init loss
197 |         self.policy.prepare_loss(v_coef, e_coef, max_grad_norm, alpha, epsilon)
198 |         # init replay buffer
199 |         self.trans_buffer = MultiAgentOnPolicyBuffer(gamma, coop_gamma, distance_mask)
200 | 
201 | 
202 | class IA2C_CU(MA2C_NC):
203 |     def __init__(self, n_s, n_a, neighbor_mask, distance_mask, coop_gamma,
204 |                  total_step, model_config, seed=0):
205 |         self.name = 'ma2c_cu'
206 |         self._init_algo(n_s, n_a, neighbor_mask, distance_mask, coop_gamma,
207 |                         total_step, seed, model_config)
208 | 
209 |     def _init_policy(self):
210 |         return ConsensusPolicy(self.n_s, self.n_a, self.n_agent, self.n_step,
211 |                                self.neighbor_mask, n_fc=self.n_fc, n_h=self.n_lstm)
212 | 
213 | 
214 | class MA2C_IC3(MA2C_NC):
215 |     def __init__(self, n_s, n_a, neighbor_mask, distance_mask, coop_gamma,
216 |                  total_step, model_config, seed=0):
217 |         self.name = 'ma2c_ic3'
218 |         self._init_algo(n_s, n_a, neighbor_mask, distance_mask, coop_gamma,
219 |                         total_step, seed, model_config)
220 | 
221 |     def _init_policy(self):
222 |         return IC3MultiAgentPolicy(self.n_s, self.n_a, self.n_agent, self.n_step,
223 |                                    self.neighbor_mask, n_fc=self.n_fc, n_h=self.n_lstm)
224 | 
225 | 
226 | class MA2C_DIAL(MA2C_NC):
227 |     def __init__(self, n_s, n_a, neighbor_mask, distance_mask, coop_gamma,
228 |                  total_step, model_config, seed=0):
229 |         self.name = 'ma2c_dial'
230 |         self._init_algo(n_s, n_a, neighbor_mask, distance_mask, coop_gamma,
231 |                         total_step, seed, model_config)
232 | 
233 |     def _init_policy(self):
234 |         return DIALMultiAgentPolicy(self.n_s, self.n_a, self.n_agent, self.n_step,
235 |                                     self.neighbor_mask, n_fc=self.n_fc, n_h=self.n_lstm)
236 | 


--------------------------------------------------------------------------------
/agents/policies.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from agents.utils import *
  4 | 
  5 | 
  6 | class Policy:
  7 |     def __init__(self, n_a, n_s, n_step, policy_name, agent_name):
  8 |         self.name = policy_name
  9 |         if agent_name is not None:
 10 |             # for multi-agent system
 11 |             self.name += '_' + str(agent_name)
 12 |         self.n_a = n_a
 13 |         self.n_s = n_s
 14 |         self.n_step = n_step
 15 | 
 16 |     def forward(self, ob, *_args, **_kwargs):
 17 |         raise NotImplementedError()
 18 | 
 19 |     def prepare_loss(self, v_coef, e_coef, max_grad_norm, alpha, epsilon):
 20 |         self.A = tf.placeholder(tf.int32, [self.n_step])
 21 |         self.ADV = tf.placeholder(tf.float32, [self.n_step])
 22 |         self.R = tf.placeholder(tf.float32, [self.n_step])
 23 |         A_sparse = tf.one_hot(self.A, self.n_a)
 24 |         log_pi = tf.log(tf.clip_by_value(self.pi, 1e-10, 1.0))
 25 |         entropy = -tf.reduce_sum(self.pi * log_pi, axis=1)
 26 |         entropy_loss = -tf.reduce_mean(entropy) * e_coef
 27 |         policy_loss = -tf.reduce_mean(tf.reduce_sum(log_pi * A_sparse, axis=1) * self.ADV)
 28 |         value_loss = tf.reduce_mean(tf.square(self.R - self.v)) * 0.5 * v_coef
 29 |         self.loss = policy_loss + value_loss + entropy_loss
 30 | 
 31 |         wts = tf.trainable_variables(scope=self.name)
 32 |         grads = tf.gradients(self.loss, wts)
 33 |         if max_grad_norm > 0:
 34 |             grads, self.grad_norm = tf.clip_by_global_norm(grads, max_grad_norm)
 35 |         self.lr = tf.placeholder(tf.float32, [])
 36 |         self.optimizer = tf.train.RMSPropOptimizer(learning_rate=self.lr, decay=alpha,
 37 |                                                    epsilon=epsilon)
 38 |         self._train = self.optimizer.apply_gradients(list(zip(grads, wts)))
 39 |         # monitor training
 40 |         summaries = []
 41 |         summaries.append(tf.summary.scalar('loss/%s_entropy_loss' % self.name, entropy_loss))
 42 |         summaries.append(tf.summary.scalar('loss/%s_policy_loss' % self.name, policy_loss))
 43 |         summaries.append(tf.summary.scalar('loss/%s_value_loss' % self.name, value_loss))
 44 |         summaries.append(tf.summary.scalar('loss/%s_total_loss' % self.name, self.loss))
 45 |         summaries.append(tf.summary.scalar('train/%s_lr' % self.name, self.lr))
 46 |         summaries.append(tf.summary.scalar('train/%s_gradnorm' % self.name, self.grad_norm))
 47 |         self.summary = tf.summary.merge(summaries)
 48 | 
 49 |     def _build_actor_head(self, h, agent_name=None):
 50 |         name = 'pi'
 51 |         if agent_name is not None:
 52 |             name += '_' + str(agent_name)
 53 |         pi = fc(h, name, self.n_a, act=tf.nn.softmax)
 54 |         return pi
 55 | 
 56 |     def _build_critic_head(self, h, na, n_n=None, agent_name=None):
 57 |         name = 'v'
 58 |         if agent_name is not None:
 59 |             name += '_' + str(agent_name)
 60 |         if n_n is None:
 61 |             n_n = na.shape[-1]
 62 |         na_sparse = tf.one_hot(na, self.n_a, axis=-1)
 63 |         na_sparse = tf.reshape(na_sparse, [-1, self.n_a*n_n])
 64 |         h = tf.concat([h, na_sparse], 1)
 65 |         v = fc(h, name, 1, act=lambda x: x)
 66 |         return v
 67 | 
 68 | 
 69 | class LstmPolicy(Policy):
 70 |     def __init__(self, n_s, n_a, n_n, n_step, n_fc=64, n_lstm=64, name=None):
 71 |         super().__init__(n_a, n_s, n_step, 'lstm', name)
 72 |         self.n_lstm = n_lstm
 73 |         self.n_fc = n_fc
 74 |         self.n_n = n_n
 75 |         self.ob_fw = tf.placeholder(tf.float32, [1, n_s]) # forward 1-step
 76 |         self.naction_fw = tf.placeholder(tf.int32, [1, n_n])
 77 |         self.done_fw = tf.placeholder(tf.float32, [1])
 78 |         self.ob_bw = tf.placeholder(tf.float32, [n_step, n_s]) # backward n-step
 79 |         self.naction_bw = tf.placeholder(tf.int32, [n_step, n_n])
 80 |         self.done_bw = tf.placeholder(tf.float32, [n_step])
 81 |         self.states = tf.placeholder(tf.float32, [n_lstm * 2])
 82 |         with tf.variable_scope(self.name):
 83 |             self.pi_fw, self.v_fw, self.new_states = self._build_net('forward')
 84 |         with tf.variable_scope(self.name, reuse=True):
 85 |             self.pi, self.v, _ = self._build_net('backward')
 86 |         self._reset()
 87 | 
 88 |     def backward(self, sess, obs, nactions, acts, dones, Rs, Advs, cur_lr,
 89 |                  summary_writer=None, global_step=None):
 90 |         summary, _ = sess.run([self.summary, self._train],
 91 |                               {self.ob_bw: obs,
 92 |                                self.naction_bw: nactions,
 93 |                                self.done_bw: dones,
 94 |                                self.states: self.states_bw,
 95 |                                self.A: acts,
 96 |                                self.ADV: Advs,
 97 |                                self.R: Rs,
 98 |                                self.lr: cur_lr})
 99 |         self.states_bw = np.copy(self.states_fw)
100 |         if summary_writer is not None:
101 |             summary_writer.add_summary(summary, global_step=global_step)
102 | 
103 |     def forward(self, sess, ob, done, naction=None, out_type='p'):
104 |         # update state only when p is called
105 |         ins = {self.ob_fw: np.array([ob]),
106 |                self.done_fw: np.array([done]),
107 |                self.states: self.states_fw}
108 |         if out_type.startswith('p'):
109 |             outs = [self.pi_fw, self.new_states]
110 |         else:
111 |             outs = [self.v_fw]
112 |             ins[self.naction_fw] = np.array([naction])
113 |         out_values = sess.run(outs, ins)
114 |         out_value = out_values[0]
115 |         if out_type.startswith('p'):
116 |             self.states_fw = out_values[-1]
117 |         return out_value
118 | 
119 |     def _build_net(self, in_type):
120 |         if in_type == 'forward':
121 |             ob = self.ob_fw
122 |             done = self.done_fw
123 |             naction = self.naction_fw
124 |         else:
125 |             ob = self.ob_bw
126 |             done = self.done_bw
127 |             naction = self.naction_bw
128 |         h = fc(ob, 'fc', self.n_fc)
129 |         h, new_states = lstm(h, done, self.states, 'lstm')
130 |         pi = self._build_actor_head(h)
131 |         v = self._build_critic_head(h, naction)
132 |         return tf.squeeze(pi), tf.squeeze(v), new_states
133 | 
134 |     def _reset(self):
135 |         # forget the cumulative states every cum_step
136 |         self.states_fw = np.zeros(self.n_lstm * 2, dtype=np.float32)
137 |         self.states_bw = np.zeros(self.n_lstm * 2, dtype=np.float32)
138 | 
139 | 
140 | class FPPolicy(LstmPolicy):
141 |     def __init__(self, n_s, n_a, n_n, n_step, n_fc=64, n_lstm=64, name=None):
142 |         super().__init__(n_s, n_a, n_n, n_step, n_fc, n_lstm, name)
143 | 
144 |     def _build_net(self, in_type):
145 |         if in_type == 'forward':
146 |             ob = self.ob_fw
147 |             done = self.done_fw
148 |             naction = self.naction_fw
149 |         else:
150 |             ob = self.ob_bw
151 |             done = self.done_bw
152 |             naction = self.naction_bw
153 |         n_x = int(self.n_s - self.n_n * self.n_a)
154 |         hx = fc(ob[:,:n_x], 'fcs', self.n_fc)
155 |         hp = fc(ob[:,n_x:], 'fcp', self.n_fc)
156 |         h = tf.concat([hx, hp], axis=1)
157 |         h, new_states = lstm(h, done, self.states, 'lstm')
158 |         pi = self._build_actor_head(h)
159 |         v = self._build_critic_head(h, naction)
160 |         return tf.squeeze(pi), tf.squeeze(v), new_states
161 | 
162 | 
163 | class NCMultiAgentPolicy(Policy):
164 |     """ Inplemented as a centralized agent. To simplify the implementation, all input
165 |     and output dimensions are identical among all agents, and invalid values are casted as
166 |     zeros during runtime."""
167 |     def __init__(self, n_s, n_a, n_agent, n_step, neighbor_mask, n_fc=64, n_h=64):
168 |         super().__init__(n_a, n_s, n_step, 'nc', None)
169 |         self._init_policy(n_agent, neighbor_mask, n_h)
170 | 
171 |     def backward(self, sess, obs, policies, acts, dones, Rs, Advs, cur_lr,
172 |                  summary_writer=None, global_step=None):
173 |         summary, _ = sess.run([self.summary, self._train],
174 |                               {self.ob_bw: obs,
175 |                                self.policy_bw: policies,
176 |                                self.action_bw: acts,
177 |                                self.done_bw: dones,
178 |                                self.states: self.states_bw,
179 |                                self.ADV: Advs,
180 |                                self.R: Rs,
181 |                                self.lr: cur_lr})
182 |         self.states_bw = np.copy(self.states_fw)
183 |         if summary_writer is not None:
184 |             summary_writer.add_summary(summary, global_step=global_step)
185 | 
186 |     def forward(self, sess, ob, done, policy, action=None, out_type='p'):
187 |         # update state only when p is called
188 |         ins = {self.ob_fw: np.expand_dims(ob, axis=1),
189 |                self.done_fw: np.expand_dims(done, axis=1),
190 |                self.policy_fw: np.expand_dims(policy, axis=1),
191 |                self.states: self.states_fw}
192 |         if out_type.startswith('p'):
193 |             outs = [self.pi_fw, self.new_states]
194 |         else:
195 |             outs = [self.v_fw]
196 |             ins[self.action_fw] = np.expand_dims(action, axis=1)
197 |         out_values = sess.run(outs, ins)
198 |         out_value = out_values[0]
199 |         if out_type.startswith('p'):
200 |             self.states_fw = out_values[-1]
201 |         return out_value
202 | 
203 |     def prepare_loss(self, v_coef, e_coef, max_grad_norm, alpha, epsilon):
204 |         self.ADV = tf.placeholder(tf.float32, [self.n_agent, self.n_step])
205 |         self.R = tf.placeholder(tf.float32, [self.n_agent, self.n_step])
206 |         A_sparse = tf.one_hot(self.action_bw, self.n_a)
207 |         # all losses are averaged over steps but summed over agents
208 |         log_pi = tf.log(tf.clip_by_value(self.pi, 1e-10, 1.0))
209 |         entropy = -tf.reduce_sum(self.pi * log_pi, axis=-1)
210 |         entropy_loss = -tf.reduce_sum(tf.reduce_mean(entropy, axis=-1)) * e_coef
211 |         policy_loss = -tf.reduce_sum(tf.reduce_mean(tf.reduce_sum(log_pi * A_sparse, axis=-1) * self.ADV, axis=-1))
212 |         value_loss = tf.reduce_sum(tf.reduce_mean(tf.square(self.R - self.v), axis=-1)) * 0.5 * v_coef
213 |         self.loss = policy_loss + value_loss + entropy_loss
214 | 
215 |         wts = tf.trainable_variables(scope=self.name)
216 |         grads = tf.gradients(self.loss, wts)
217 |         if max_grad_norm > 0:
218 |             grads, self.grad_norm = tf.clip_by_global_norm(grads, max_grad_norm)
219 |         self.lr = tf.placeholder(tf.float32, [])
220 |         self.optimizer = tf.train.RMSPropOptimizer(learning_rate=self.lr, decay=alpha,
221 |                                                    epsilon=epsilon)
222 |         self._train = self.optimizer.apply_gradients(list(zip(grads, wts)))
223 |         # monitor training
224 |         summaries = []
225 |         summaries.append(tf.summary.scalar('loss/%s_entropy_loss' % self.name, entropy_loss))
226 |         summaries.append(tf.summary.scalar('loss/%s_policy_loss' % self.name, policy_loss))
227 |         summaries.append(tf.summary.scalar('loss/%s_value_loss' % self.name, value_loss))
228 |         summaries.append(tf.summary.scalar('loss/%s_total_loss' % self.name, self.loss))
229 |         summaries.append(tf.summary.scalar('train/%s_lr' % self.name, self.lr))
230 |         summaries.append(tf.summary.scalar('train/%s_gradnorm' % self.name, self.grad_norm))
231 |         self.summary = tf.summary.merge(summaries)
232 | 
233 |     def _build_net(self, in_type):
234 |         if in_type == 'forward':
235 |             ob = self.ob_fw
236 |             policy = self.policy_fw
237 |             action = self.action_fw
238 |             done = self.done_fw
239 |         else:
240 |             ob = self.ob_bw
241 |             policy = self.policy_bw
242 |             action = self.action_bw
243 |             done = self.done_bw
244 |         h, new_states = lstm_comm_new(ob, policy, done, self.neighbor_mask, self.states, 'lstm_comm')
245 |         pi_ls = []
246 |         v_ls = []
247 |         for i in range(self.n_agent):
248 |             h_i = h[i] # Txn_h
249 |             naction_i = tf.transpose(tf.boolean_mask(action, self.neighbor_mask[i])) # Txn_n
250 |             pi = self._build_actor_head(h_i, agent_name='%d' % i)
251 |             v = self._build_critic_head(h_i, naction_i, n_n=int(np.sum(self.neighbor_mask[i])),
252 |                                         agent_name='%d' % i)
253 |             pi_ls.append(tf.expand_dims(pi, axis=0))
254 |             v_ls.append(tf.expand_dims(v, axis=0))
255 |         return tf.squeeze(tf.concat(pi_ls, axis=0)), tf.squeeze(tf.concat(v_ls, axis=0)), new_states
256 | 
257 |     def _init_policy(self, n_agent, neighbor_mask, n_h):
258 |         self.n_agent = n_agent
259 |         self.neighbor_mask = neighbor_mask #n_agent x n_agent
260 |         self.n_h = n_h
261 |         self.ob_fw = tf.placeholder(tf.float32, [n_agent, 1, self.n_s]) # forward 1-step
262 |         self.policy_fw = tf.placeholder(tf.float32, [n_agent, 1, self.n_a])
263 |         self.action_fw = tf.placeholder(tf.int32, [n_agent, 1])
264 |         self.done_fw = tf.placeholder(tf.float32, [1])
265 |         self.ob_bw = tf.placeholder(tf.float32, [n_agent, self.n_step, self.n_s]) # backward n-step
266 |         self.policy_bw = tf.placeholder(tf.float32, [n_agent, self.n_step, self.n_a])
267 |         self.action_bw = tf.placeholder(tf.int32, [n_agent, self.n_step])
268 |         self.done_bw = tf.placeholder(tf.float32, [self.n_step])
269 |         self.states = tf.placeholder(tf.float32, [n_agent, n_h * 2])
270 | 
271 |         with tf.variable_scope(self.name):
272 |             self.pi_fw, self.v_fw, self.new_states = self._build_net('forward')
273 |         with tf.variable_scope(self.name, reuse=True):
274 |             self.pi, self.v, _ = self._build_net('backward')
275 |         self._reset()
276 | 
277 |     def _reset(self):
278 |         self.states_fw = np.zeros((self.n_agent, self.n_h * 2), dtype=np.float32)
279 |         self.states_bw = np.zeros((self.n_agent, self.n_h * 2), dtype=np.float32)
280 | 
281 | 
282 | class ConsensusPolicy(NCMultiAgentPolicy):
283 |     def __init__(self, n_s, n_a, n_agent, n_step, neighbor_mask, n_fc=64, n_h=64):
284 |         Policy.__init__(self, n_a, n_s, n_step, 'cu', None)
285 |         self.n_agent = n_agent
286 |         self.n_h = n_h
287 |         self.neighbor_mask = neighbor_mask
288 |         self._init_policy(n_agent, neighbor_mask, n_h)
289 | 
290 |     def backward(self, sess, obs, policies, acts, dones, Rs, Advs, cur_lr,
291 |                  summary_writer=None, global_step=None):
292 |         super().backward(sess, obs, policies, acts, dones, Rs, Advs, cur_lr,
293 |                          summary_writer, global_step)
294 |         sess.run(self._consensus_update)
295 | 
296 |     def prepare_loss(self, v_coef, e_coef, max_grad_norm, alpha, epsilon):
297 |         super().prepare_loss(v_coef, e_coef, max_grad_norm, alpha, epsilon)
298 |         consensus_update = []
299 |         for i in range(self.n_agent):
300 |             wt_from, wt_to = self._get_critic_wts(i)
301 |             for w1, w2 in zip(wt_from, wt_to):
302 |                 consensus_update.append(w2.assign(w1))
303 |         self._consensus_update = tf.group(*consensus_update)
304 | 
305 |     def _build_net(self, in_type):
306 |         if in_type == 'forward':
307 |             ob = self.ob_fw
308 |             done = self.done_fw
309 |             action = self.action_fw
310 |         else:
311 |             ob = self.ob_bw
312 |             done = self.done_bw
313 |             action = self.action_bw
314 |         pi_ls = []
315 |         v_ls = []
316 |         new_states_ls = []
317 |         for i in range(self.n_agent):
318 |             h = fc(ob[i], 'fc_%d' % i, self.n_h)
319 |             h, new_states = lstm(h, done, self.states[i], 'lstm_%d' % i)
320 |             pi = self._build_actor_head(h, agent_name='%d' % i)
321 |             naction = tf.transpose(tf.boolean_mask(action, self.neighbor_mask[i]))
322 |             v = self._build_critic_head(h, naction, n_n=int(np.sum(self.neighbor_mask[i])), agent_name='%d' % i)
323 |             pi_ls.append(tf.expand_dims(pi, axis=0))
324 |             v_ls.append(tf.expand_dims(v, axis=0))
325 |             new_states_ls.append(tf.expand_dims(new_states, axis=0))
326 |         pi_ls = tf.squeeze(tf.concat(pi_ls, axis=0))
327 |         v_ls = tf.squeeze(tf.concat(v_ls, axis=0))
328 |         new_states_ls = tf.squeeze(tf.concat(new_states_ls, axis=0))
329 |         return pi_ls, v_ls, new_states_ls
330 | 
331 |     def _get_critic_wts(self, agent_i):
332 |         neighbor_mask = self.neighbor_mask[agent_i]
333 |         agents = [agent_i] + list(np.where(neighbor_mask == 1)[0])
334 |         wt_i = []
335 |         wt_n = []
336 |         for i in agents:
337 |             critic_scope = [self.name + ('/lstm_%d' % i)]
338 |             wt = []
339 |             for scope in critic_scope:
340 |                 wt += tf.trainable_variables(scope=scope)
341 |             if i == agent_i:
342 |                 wt_i = wt
343 |             wt_n.append(wt)
344 |         mean_wt_n = []
345 |         n_n = len(wt_n)
346 |         n_w = len(wt_n[0])
347 |         for i in range(n_w):
348 |             cur_wts = []
349 |             for j in range(n_n):
350 |                 cur_wts.append(tf.expand_dims(wt_n[j][i], axis=-1))
351 |             cur_wts = tf.concat(cur_wts, axis=-1)
352 |             cur_wts = tf.reduce_mean(cur_wts, axis=-1)
353 |             mean_wt_n.append(cur_wts)
354 |         return mean_wt_n, wt_i
355 | 
356 | 
357 | class IC3MultiAgentPolicy(NCMultiAgentPolicy):
358 |     """Reference code: https://github.com/IC3Net/IC3Net/blob/master/comm.py.
359 |        Note in IC3, the message is generated from hidden state only, so current state
360 |        and neigbor policies are not included in the inputs."""
361 |     def __init__(self, n_s, n_a, n_agent, n_step, neighbor_mask, n_fc=64, n_h=64):
362 |         Policy.__init__(self, n_a, n_s, n_step, 'ic3', None)
363 |         self._init_policy(n_agent, neighbor_mask, n_h)
364 | 
365 |     def _build_net(self, in_type):
366 |         if in_type == 'forward':
367 |             ob = self.ob_fw
368 |             action = self.action_fw
369 |             done = self.done_fw
370 |         else:
371 |             ob = self.ob_bw
372 |             action = self.action_bw
373 |             done = self.done_bw
374 |         h, new_states = lstm_ic3(ob, done, self.neighbor_mask, self.states, 'lstm_ic3')
375 |         pi_ls = []
376 |         v_ls = []
377 |         for i in range(self.n_agent):
378 |             h_i = h[i] # Txn_h
379 |             naction_i = tf.transpose(tf.boolean_mask(action, self.neighbor_mask[i])) # Txn_n
380 |             pi = self._build_actor_head(h_i, agent_name='%d' % i)
381 |             v = self._build_critic_head(h_i, naction_i, n_n=int(np.sum(self.neighbor_mask[i])),
382 |                                         agent_name='%d' % i)
383 |             pi_ls.append(tf.expand_dims(pi, axis=0))
384 |             v_ls.append(tf.expand_dims(v, axis=0))
385 |         return tf.squeeze(tf.concat(pi_ls, axis=0)), tf.squeeze(tf.concat(v_ls, axis=0)), new_states
386 | 
387 | 
388 | class DIALMultiAgentPolicy(NCMultiAgentPolicy):
389 |     def __init__(self, n_s, n_a, n_agent, n_step, neighbor_mask, n_fc=64, n_h=64):
390 |         Policy.__init__(self, n_a, n_s, n_step, 'dial', None)
391 |         self._init_policy(n_agent, neighbor_mask, n_h)
392 | 
393 |     def _build_net(self, in_type):
394 |         if in_type == 'forward':
395 |             ob = self.ob_fw
396 |             policy = self.policy_fw
397 |             action = self.action_fw
398 |             done = self.done_fw
399 |         else:
400 |             ob = self.ob_bw
401 |             policy = self.policy_bw
402 |             action = self.action_bw
403 |             done = self.done_bw
404 |         h, new_states = lstm_dial(ob, policy, done, self.neighbor_mask, self.states, 'lstm_comm')
405 |         pi_ls = []
406 |         v_ls = []
407 |         for i in range(self.n_agent):
408 |             h_i = h[i] # Txn_h
409 |             naction_i = tf.transpose(tf.boolean_mask(action, self.neighbor_mask[i])) # Txn_n
410 |             pi = self._build_actor_head(h_i, agent_name='%d' % i)
411 |             v = self._build_critic_head(h_i, naction_i, n_n=int(np.sum(self.neighbor_mask[i])),
412 |                                         agent_name='%d' % i)
413 |             pi_ls.append(tf.expand_dims(pi, axis=0))
414 |             v_ls.append(tf.expand_dims(v, axis=0))
415 |         return tf.squeeze(tf.concat(pi_ls, axis=0)), tf.squeeze(tf.concat(v_ls, axis=0)), new_states
416 | 
417 | 


--------------------------------------------------------------------------------
/agents/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | """
  5 | initializers
  6 | """
  7 | DEFAULT_SCALE = np.sqrt(2)
  8 | DEFAULT_MODE = 'fan_in'
  9 | 
 10 | def ortho_init(scale=DEFAULT_SCALE, mode=None):
 11 |     def _ortho_init(shape, dtype, partition_info=None):
 12 |         # lasagne ortho init for tf
 13 |         shape = tuple(shape)
 14 |         if len(shape) == 2: # fc: in, out
 15 |             flat_shape = shape
 16 |         elif (len(shape) == 3) or (len(shape) == 4): # 1d/2dcnn: (in_h), in_w, in_c, out
 17 |             flat_shape = (np.prod(shape[:-1]), shape[-1])
 18 |         a = np.random.standard_normal(flat_shape)
 19 |         u, _, v = np.linalg.svd(a, full_matrices=False)
 20 |         q = u if u.shape == flat_shape else v # pick the one with the correct shape
 21 |         q = q.reshape(shape)
 22 |         return (scale * q).astype(np.float32)
 23 |     return _ortho_init
 24 | 
 25 | 
 26 | def norm_init(scale=DEFAULT_SCALE, mode=DEFAULT_MODE):
 27 |     def _norm_init(shape, dtype, partition_info=None):
 28 |         shape = tuple(shape)
 29 |         if len(shape) == 2:
 30 |             n_in = shape[0]
 31 |         elif (len(shape) == 3) or (len(shape) == 4):
 32 |             n_in = np.prod(shape[:-1])
 33 |         a = np.random.standard_normal(shape)
 34 |         if mode == 'fan_in':
 35 |             n = n_in
 36 |         elif mode == 'fan_out':
 37 |             n = shape[-1]
 38 |         elif mode == 'fan_avg':
 39 |             n = 0.5 * (n_in + shape[-1])
 40 |         return (scale * a / np.sqrt(n)).astype(np.float32)
 41 | 
 42 | DEFAULT_METHOD = ortho_init
 43 | """
 44 | layers
 45 | """
 46 | def conv(x, scope, n_out, f_size, stride=1, pad='VALID', f_size_w=None, act=tf.nn.relu,
 47 |          conv_dim=1, init_scale=DEFAULT_SCALE, init_mode=None, init_method=DEFAULT_METHOD):
 48 |     with tf.variable_scope(scope):
 49 |         b = tf.get_variable("b", [n_out], initializer=tf.constant_initializer(0.0))
 50 |         if conv_dim == 1:
 51 |             n_c = x.shape[2].value
 52 |             w = tf.get_variable("w", [f_size, n_c, n_out],
 53 |                                 initializer=init_method(init_scale, init_mode))
 54 |             z = tf.nn.conv1d(x, w, stride=stride, padding=pad) + b
 55 |         elif conv_dim == 2:
 56 |             n_c = x.shape[3].value
 57 |             if f_size_w is None:
 58 |                 f_size_w = f_size
 59 |             w = tf.get_variable("w", [f_size, f_size_w, n_c, n_out],
 60 |                                 initializer=init_method(init_scale, init_mode))
 61 |             z = tf.nn.conv2d(x, w, strides=[1, stride, stride, 1], padding=pad) + b
 62 |         return act(z)
 63 | 
 64 | 
 65 | def fc(x, scope, n_out, act=tf.nn.relu, init_scale=DEFAULT_SCALE,
 66 |        init_mode=DEFAULT_MODE, init_method=DEFAULT_METHOD):
 67 |     with tf.variable_scope(scope):
 68 |         n_in = x.shape[1].value
 69 |         w = tf.get_variable("w", [n_in, n_out],
 70 |                             initializer=init_method(init_scale, init_mode))
 71 |         b = tf.get_variable("b", [n_out], initializer=tf.constant_initializer(0.0))
 72 |         z = tf.matmul(x, w) + b
 73 |         return act(z)
 74 | 
 75 | 
 76 | def batch_to_seq(x):
 77 |     n_step = x.shape[0].value
 78 |     if len(x.shape) == 1:
 79 |         x = tf.expand_dims(x, -1)
 80 |     return tf.split(axis=0, num_or_size_splits=n_step, value=x)
 81 | 
 82 | 
 83 | def seq_to_batch(x):
 84 |     return tf.concat(x, axis=0)
 85 | 
 86 | 
 87 | def lstm(xs, dones, s, scope, init_scale=DEFAULT_SCALE, init_mode=DEFAULT_MODE,
 88 |          init_method=DEFAULT_METHOD):
 89 |     xs = batch_to_seq(xs)
 90 |     # need dones to reset states
 91 |     dones = batch_to_seq(dones)
 92 |     n_in = xs[0].shape[1].value
 93 |     n_out = s.shape[0] // 2
 94 |     with tf.variable_scope(scope):
 95 |         wx = tf.get_variable("wx", [n_in, n_out*4],
 96 |                              initializer=init_method(init_scale, init_mode))
 97 |         wh = tf.get_variable("wh", [n_out, n_out*4],
 98 |                              initializer=init_method(init_scale, init_mode))
 99 |         b = tf.get_variable("b", [n_out*4], initializer=tf.constant_initializer(0.0))
100 |     s = tf.expand_dims(s, 0)
101 |     c, h = tf.split(axis=1, num_or_size_splits=2, value=s)
102 |     for ind, (x, done) in enumerate(zip(xs, dones)):
103 |         c = c * (1-done)
104 |         h = h * (1-done)
105 |         z = tf.matmul(x, wx) + tf.matmul(h, wh) + b
106 |         i, f, o, u = tf.split(axis=1, num_or_size_splits=4, value=z)
107 |         i = tf.nn.sigmoid(i)
108 |         f = tf.nn.sigmoid(f)
109 |         o = tf.nn.sigmoid(o)
110 |         u = tf.tanh(u)
111 |         c = f*c + i*u
112 |         h = o*tf.tanh(c)
113 |         xs[ind] = h
114 |     s = tf.concat(axis=1, values=[c, h])
115 |     return seq_to_batch(xs), tf.squeeze(s)
116 | 
117 | 
118 | def lstm_comm(xs, ps, dones, masks, s, scope, init_scale=DEFAULT_SCALE, init_mode=DEFAULT_MODE,
119 |               init_method=DEFAULT_METHOD):
120 |     n_agent = s.shape[0]
121 |     n_h = s.shape[1] // 2
122 |     n_s = xs.shape[-1]
123 |     n_a = ps.shape[-1]
124 |     xs = tf.transpose(xs, perm=[1,0,2]) # TxNxn_s
125 |     xs = batch_to_seq(xs)
126 |     ps = tf.transpose(ps, perm=[1,0,2]) # TxNxn_a
127 |     ps = batch_to_seq(ps)
128 |     # need dones to reset states
129 |     dones = batch_to_seq(dones) # Tx1
130 |     # create wts
131 |     n_in_msg = n_h + n_s + n_a
132 |     w_msg = []
133 |     b_msg = []
134 |     wx_hid = []
135 |     wh_hid = []
136 |     b_hid = []
137 |     for i in range(n_agent):
138 |         n_m = np.sum(masks[i])
139 |         n_in_hid = n_s + n_h*n_m
140 |         with tf.variable_scope(scope + ('_%d' % i)):
141 |             w_msg.append(tf.get_variable("w_msg", [n_in_msg, n_h],
142 |                                          initializer=init_method(init_scale, init_mode)))
143 |             b_msg.append(tf.get_variable("b_msg", [n_h],
144 |                                          initializer=tf.constant_initializer(0.0)))
145 |             wx_hid.append(tf.get_variable("wx_hid", [n_in_hid, n_h*4],
146 |                                           initializer=init_method(init_scale, init_mode)))
147 |             wh_hid.append(tf.get_variable("wh_hid", [n_h, n_h*4],
148 |                                           initializer=init_method(init_scale, init_mode)))
149 |             b_hid.append(tf.get_variable("b_hid", [n_h*4],
150 |                                          initializer=tf.constant_initializer(0.0)))
151 |     c, h = tf.split(axis=1, num_or_size_splits=2, value=s)
152 |     # loop over steps
153 |     for t, (x, p, done) in enumerate(zip(xs, ps, dones)):
154 |         # abuse 1 agent as 1 step
155 |         x = batch_to_seq(tf.squeeze(x, axis=0))
156 |         p = batch_to_seq(tf.squeeze(p, axis=0))
157 |         out_h = []
158 |         out_c = []
159 |         out_m = []
160 |         # communication phase
161 |         for i, (xi, pi) in enumerate(zip(x, p)):
162 |             hi = tf.expand_dims(h[i], axis=0)
163 |             si = tf.concat([hi, xi, pi], axis=1)
164 |             mi = tf.nn.relu(tf.matmul(si, w_msg[i]) + b_msg[i])
165 |             out_m.append(mi)
166 |         out_m = tf.concat(out_m, axis=0) # Nxn_h
167 |         # hidden phase
168 |         for i, xi in enumerate(x):
169 |             ci = tf.expand_dims(c[i], axis=0)
170 |             hi = tf.expand_dims(h[i], axis=0)
171 |             # reset states for a new episode
172 |             ci = ci * (1-done)
173 |             hi = hi * (1-done)
174 |             # receive neighbor messages
175 |             mi = tf.expand_dims(tf.reshape(tf.boolean_mask(out_m, masks[i]), [-1]), axis=0)
176 |             # TODO: add additional encoding layers here
177 |             si = tf.concat([xi, mi], axis=1)
178 |             zi = tf.matmul(si, wx_hid[i]) + tf.matmul(hi, wh_hid[i]) + b_hid[i]
179 |             ii, fi, oi, ui = tf.split(axis=1, num_or_size_splits=4, value=zi)
180 |             ii = tf.nn.sigmoid(ii)
181 |             fi = tf.nn.sigmoid(fi)
182 |             oi = tf.nn.sigmoid(oi)
183 |             ui = tf.tanh(ui)
184 |             ci = fi*ci + ii*ui
185 |             hi = oi*tf.tanh(ci)
186 |             out_h.append(hi)
187 |             out_c.append(ci)
188 |         c = tf.concat(out_c, axis=0)
189 |         h = tf.concat(out_h, axis=0)
190 |         xs[t] = tf.expand_dims(h, axis=0)
191 |     s = tf.concat(axis=1, values=[c, h])
192 |     xs = seq_to_batch(xs) # TxNxn_h
193 |     xs = tf.transpose(xs, perm=[1,0,2]) # NxTxn_h
194 |     return xs, s
195 | 
196 | 
197 | def lstm_comm_new(xs, ps, dones, masks, s, scope, init_scale=DEFAULT_SCALE, init_mode=DEFAULT_MODE,
198 |                   init_method=DEFAULT_METHOD):
199 |     n_agent = s.shape[0]
200 |     n_h = s.shape[1] // 2
201 |     n_s = xs.shape[-1]
202 |     n_a = ps.shape[-1]
203 |     xs = tf.transpose(xs, perm=[1,0,2]) # TxNxn_s
204 |     xs = batch_to_seq(xs)
205 |     ps = tf.transpose(ps, perm=[1,0,2]) # TxNxn_a
206 |     ps = batch_to_seq(ps)
207 |     # need dones to reset states
208 |     dones = batch_to_seq(dones) # Tx1
209 |     # create wts
210 |     w_msg = []
211 |     b_msg = []
212 |     w_ob = []
213 |     b_ob = []
214 |     # w_fp = []
215 |     # b_fp = []
216 |     wx_hid = []
217 |     wh_hid = []
218 |     b_hid = []
219 |     n_in_hid = 3*n_h
220 |     for i in range(n_agent):
221 |         n_m = np.sum(masks[i])
222 |         # n_in_hid = (n_m+1)*n_h
223 |         with tf.variable_scope(scope + ('_%d' % i)):
224 |             w_msg.append(tf.get_variable("w_msg", [n_h*n_m, n_h],
225 |                                          initializer=init_method(init_scale, init_mode)))
226 |             b_msg.append(tf.get_variable("b_msg", [n_h],
227 |                                          initializer=tf.constant_initializer(0.0)))
228 |             w_ob.append(tf.get_variable("w_ob", [n_s*(n_m+1), n_h],
229 |                                         initializer=init_method(init_scale, init_mode)))
230 |             b_ob.append(tf.get_variable("b_ob", [n_h],
231 |                                         initializer=tf.constant_initializer(0.0)))
232 |             # w_fp.append(tf.get_variable("w_fp", [n_a*n_m, n_h],
233 |                                         # initializer=init_method(init_scale, init_mode)))
234 |             # b_fp.append(tf.get_variable("b_fp", [n_h],
235 |                                         # initializer=tf.constant_initializer(0.0)))
236 |             wx_hid.append(tf.get_variable("wx_hid", [n_in_hid, n_h*4],
237 |                                           initializer=init_method(init_scale, init_mode)))
238 |             wh_hid.append(tf.get_variable("wh_hid", [n_h, n_h*4],
239 |                                           initializer=init_method(init_scale, init_mode)))
240 |             b_hid.append(tf.get_variable("b_hid", [n_h*4],
241 |                                          initializer=tf.constant_initializer(0.0)))
242 |     c, h = tf.split(axis=1, num_or_size_splits=2, value=s)
243 |     # loop over steps
244 |     for t, (x, p, done) in enumerate(zip(xs, ps, dones)):
245 |         # abuse 1 agent as 1 step
246 |         x = tf.squeeze(x, axis=0)
247 |         p = tf.squeeze(p, axis=0)
248 |         # x = batch_to_seq(tf.squeeze(x, axis=0))
249 |         # p = batch_to_seq(tf.squeeze(p, axis=0))
250 |         out_h = []
251 |         out_c = []
252 |         out_m = []
253 |         # communication phase
254 |         for i in range(n_agent):
255 |             hi = tf.expand_dims(h[i], axis=0)
256 |             # hxi = fc(xi, 'mfc_s_%d' % i, n_h, act=tf.nn.tanh)
257 |             # hpi = fc(pi, 'mfc_p_%d' % i, n_h, act=tf.nn.tanh)
258 |             # si = tf.concat([hi, hxi, hpi], axis=1)
259 |             mi = fc(hi, 'mfc_%d' % i, n_h)
260 |             out_m.append(mi)
261 |         # out_m = [tf.expand_dims(h[i], axis=0) for i in range(n_agent)]
262 |         out_m = tf.concat(out_m, axis=0) # Nxn_h
263 |         # hidden phase
264 |         for i in range(n_agent):
265 |             ci = tf.expand_dims(c[i], axis=0)
266 |             hi = tf.expand_dims(h[i], axis=0)
267 |             # reset states for a new episode
268 |             ci = ci * (1-done)
269 |             hi = hi * (1-done)
270 |             # receive neighbor messages
271 |             mi = tf.expand_dims(tf.reshape(tf.boolean_mask(out_m, masks[i]), [-1]), axis=0)
272 |             # pi = tf.expand_dims(tf.reshape(tf.boolean_mask(p, masks[i]), [-1]), axis=0)
273 |             xi = tf.expand_dims(tf.reshape(tf.boolean_mask(x, masks[i]), [-1]), axis=0)
274 |             xi = tf.concat([tf.expand_dims(x[i], axis=0), xi], axis=1)
275 |             hxi = tf.nn.relu(tf.matmul(xi, w_ob[i]) + b_ob[i])
276 |             # hpi = tf.nn.relu(tf.matmul(pi, w_fp[i]) + b_fp[i])
277 |             hmi = tf.matmul(mi, w_msg[i]) + b_msg[i]
278 |             # si = tf.concat([hxi, hpi, hmi], axis=1)
279 |             si = tf.concat([hxi, hmi], axis=1)
280 |             zi = tf.matmul(si, wx_hid[i]) + tf.matmul(hi, wh_hid[i]) + b_hid[i]
281 |             ii, fi, oi, ui = tf.split(axis=1, num_or_size_splits=4, value=zi)
282 |             ii = tf.nn.sigmoid(ii)
283 |             fi = tf.nn.sigmoid(fi)
284 |             oi = tf.nn.sigmoid(oi)
285 |             ui = tf.tanh(ui)
286 |             ci = fi*ci + ii*ui
287 |             hi = oi*tf.tanh(ci)
288 |             out_h.append(hi)
289 |             out_c.append(ci)
290 |         c = tf.concat(out_c, axis=0)
291 |         h = tf.concat(out_h, axis=0)
292 |         xs[t] = tf.expand_dims(h, axis=0)
293 |     s = tf.concat(axis=1, values=[c, h])
294 |     xs = seq_to_batch(xs) # TxNxn_h
295 |     xs = tf.transpose(xs, perm=[1,0,2]) # NxTxn_h
296 |     return xs, s
297 | 
298 | def lstm_ic3(xs, dones, masks, s, scope, init_scale=DEFAULT_SCALE, init_mode=DEFAULT_MODE,
299 |              init_method=DEFAULT_METHOD):
300 |     n_agent = s.shape[0]
301 |     n_h = s.shape[1] // 2
302 |     n_s = xs.shape[-1]
303 |     xs = tf.transpose(xs, perm=[1,0,2]) # TxNxn_s
304 |     xs = batch_to_seq(xs)
305 |     # need dones to reset states
306 |     dones = batch_to_seq(dones) # Tx1
307 |     # create wts
308 |     w_msg = []
309 |     b_msg = []
310 |     w_ob = []
311 |     b_ob = []
312 |     wx_hid = []
313 |     wh_hid = []
314 |     b_hid = []
315 |     for i in range(n_agent):
316 |         with tf.variable_scope(scope + ('_%d' % i)):
317 |             w_msg.append(tf.get_variable("w_msg", [n_h, n_h],
318 |                                          initializer=init_method(init_scale, init_mode)))
319 |             b_msg.append(tf.get_variable("b_msg", [n_h],
320 |                                          initializer=tf.constant_initializer(0.0)))
321 |             w_ob.append(tf.get_variable("w_ob", [n_s, n_h],
322 |                                         initializer=init_method(init_scale, init_mode)))
323 |             b_ob.append(tf.get_variable("b_ob", [n_h],
324 |                                         initializer=tf.constant_initializer(0.0)))
325 |             wx_hid.append(tf.get_variable("wx_hid", [n_h, n_h*4],
326 |                                           initializer=init_method(init_scale, init_mode)))
327 |             wh_hid.append(tf.get_variable("wh_hid", [n_h, n_h*4],
328 |                                           initializer=init_method(init_scale, init_mode)))
329 |             b_hid.append(tf.get_variable("b_hid", [n_h*4],
330 |                                          initializer=tf.constant_initializer(0.0)))
331 |     c, h = tf.split(axis=1, num_or_size_splits=2, value=s)
332 |     # loop over steps
333 |     for t, (x, done) in enumerate(zip(xs, dones)):
334 |         # abuse 1 agent as 1 step
335 |         x = batch_to_seq(tf.squeeze(x, axis=0))
336 |         out_h = []
337 |         out_c = []
338 |         out_m = [tf.expand_dims(h[i], axis=0) for i in range(n_agent)]
339 |         out_m = tf.concat(out_m, axis=0) # Nxn_h
340 |         # hidden phase
341 |         for i, xi in enumerate(x):
342 |             ci = tf.expand_dims(c[i], axis=0)
343 |             hi = tf.expand_dims(h[i], axis=0)
344 |             # reset states for a new episode
345 |             ci = ci * (1-done)
346 |             hi = hi * (1-done)
347 |             # receive neighbor messages
348 |             mi = tf.reduce_mean(tf.boolean_mask(out_m, masks[i]), axis=0, keepdims=True)
349 |             # the state encoder in IC3 code is not consistent with that described in the paper.
350 |             # Here we follow the impelmentation in the paper.
351 |             si = tf.nn.tanh(tf.matmul(xi, w_ob[i]) + b_ob[i]) + tf.matmul(mi, w_msg[i]) + b_msg[i]
352 |             zi = tf.matmul(si, wx_hid[i]) + tf.matmul(hi, wh_hid[i]) + b_hid[i]
353 |             ii, fi, oi, ui = tf.split(axis=1, num_or_size_splits=4, value=zi)
354 |             ii = tf.nn.sigmoid(ii)
355 |             fi = tf.nn.sigmoid(fi)
356 |             oi = tf.nn.sigmoid(oi)
357 |             ui = tf.tanh(ui)
358 |             ci = fi*ci + ii*ui
359 |             hi = oi*tf.tanh(ci)
360 |             out_h.append(hi)
361 |             out_c.append(ci)
362 |         c = tf.concat(out_c, axis=0)
363 |         h = tf.concat(out_h, axis=0)
364 |         xs[t] = tf.expand_dims(h, axis=0)
365 |     s = tf.concat(axis=1, values=[c, h])
366 |     xs = seq_to_batch(xs) # TxNxn_h
367 |     xs = tf.transpose(xs, perm=[1,0,2]) # NxTxn_h
368 |     return xs, s
369 | 
370 | 
371 | def lstm_dial(xs, ps, dones, masks, s, scope, init_scale=DEFAULT_SCALE, init_mode=DEFAULT_MODE,
372 |               init_method=DEFAULT_METHOD):
373 |     n_agent = s.shape[0]
374 |     n_h = s.shape[1] // 2
375 |     n_s = xs.shape[-1]
376 |     n_a = ps.shape[-1]
377 |     xs = tf.transpose(xs, perm=[1,0,2]) # TxNxn_s
378 |     xs = batch_to_seq(xs)
379 |     ps = tf.transpose(ps, perm=[1,0,2]) # TxNxn_a
380 |     ps = batch_to_seq(ps)
381 |     # need dones to reset states
382 |     dones = batch_to_seq(dones) # Tx1
383 |     # create wts
384 |     w_msg = []
385 |     b_msg = []
386 |     w_ob = []
387 |     b_ob = []
388 |     wx_hid = []
389 |     wh_hid = []
390 |     b_hid = []
391 |     for i in range(n_agent):
392 |         n_m = np.sum(masks[i])
393 |         # n_in_hid = (n_m+1)*n_h
394 |         with tf.variable_scope(scope + ('_%d' % i)):
395 |             w_msg.append(tf.get_variable("w_msg", [n_h*n_m, n_h],
396 |                                          initializer=init_method(init_scale, init_mode)))
397 |             b_msg.append(tf.get_variable("b_msg", [n_h],
398 |                                          initializer=tf.constant_initializer(0.0)))
399 |             w_ob.append(tf.get_variable("w_ob", [n_s*(n_m+1), n_h],
400 |                                         initializer=init_method(init_scale, init_mode)))
401 |             b_ob.append(tf.get_variable("b_ob", [n_h],
402 |                                         initializer=tf.constant_initializer(0.0)))
403 |             wx_hid.append(tf.get_variable("wx_hid", [n_h, n_h*4],
404 |                                           initializer=init_method(init_scale, init_mode)))
405 |             wh_hid.append(tf.get_variable("wh_hid", [n_h, n_h*4],
406 |                                           initializer=init_method(init_scale, init_mode)))
407 |             b_hid.append(tf.get_variable("b_hid", [n_h*4],
408 |                                          initializer=tf.constant_initializer(0.0)))
409 |     c, h = tf.split(axis=1, num_or_size_splits=2, value=s)
410 |     # loop over steps
411 |     for t, (x, p, done) in enumerate(zip(xs, ps, dones)):
412 |         # abuse 1 agent as 1 step
413 |         x = tf.squeeze(x, axis=0)
414 |         p = tf.squeeze(p, axis=0)
415 |         out_h = []
416 |         out_c = []
417 |         out_m = []
418 |         # communication phase
419 |         for i in range(n_agent):
420 |             hi = tf.expand_dims(h[i], axis=0)
421 |             mi = fc(hi, 'mfc_%d' % i, n_h)
422 |             out_m.append(mi)
423 |         out_m = tf.concat(out_m, axis=0) # Nxn_h
424 |         # hidden phase
425 |         for i in range(n_agent):
426 |             ci = tf.expand_dims(c[i], axis=0)
427 |             hi = tf.expand_dims(h[i], axis=0)
428 |             # reset states for a new episode
429 |             ci = ci * (1-done)
430 |             hi = hi * (1-done)
431 |             # receive neighbor messages
432 |             mi = tf.expand_dims(tf.reshape(tf.boolean_mask(out_m, masks[i]), [-1]), axis=0)
433 |             ai = tf.one_hot(tf.expand_dims(tf.argmax(p[i]), axis=0), n_h)
434 |             xi = tf.expand_dims(tf.reshape(tf.boolean_mask(x, masks[i]), [-1]), axis=0)
435 |             xi = tf.concat([tf.expand_dims(x[i], axis=0), xi], axis=1)
436 |             hxi = tf.nn.relu(tf.matmul(xi, w_ob[i]) + b_ob[i])
437 |             hmi = tf.nn.relu(tf.matmul(mi, w_msg[i]) + b_msg[i])
438 |             si = hxi + hmi + ai
439 |             zi = tf.matmul(si, wx_hid[i]) + tf.matmul(hi, wh_hid[i]) + b_hid[i]
440 |             ii, fi, oi, ui = tf.split(axis=1, num_or_size_splits=4, value=zi)
441 |             ii = tf.nn.sigmoid(ii)
442 |             fi = tf.nn.sigmoid(fi)
443 |             oi = tf.nn.sigmoid(oi)
444 |             ui = tf.tanh(ui)
445 |             ci = fi*ci + ii*ui
446 |             hi = oi*tf.tanh(ci)
447 |             out_h.append(hi)
448 |             out_c.append(ci)
449 |         c = tf.concat(out_c, axis=0)
450 |         h = tf.concat(out_h, axis=0)
451 |         xs[t] = tf.expand_dims(h, axis=0)
452 |     s = tf.concat(axis=1, values=[c, h])
453 |     xs = seq_to_batch(xs) # TxNxn_h
454 |     xs = tf.transpose(xs, perm=[1,0,2]) # NxTxn_h
455 |     return xs, s
456 | 
457 | 
458 | """
459 | buffers
460 | """
461 | class TransBuffer:
462 |     def reset(self):
463 |         self.buffer = []
464 | 
465 |     @property
466 |     def size(self):
467 |         return len(self.buffer)
468 | 
469 |     def add_transition(self, ob, a, r, *_args, **_kwargs):
470 |         raise NotImplementedError()
471 | 
472 |     def sample_transition(self, *_args, **_kwargs):
473 |         raise NotImplementedError()
474 | 
475 | 
476 | class OnPolicyBuffer(TransBuffer):
477 |     def __init__(self, gamma, alpha, distance_mask):
478 |         self.gamma = gamma
479 |         self.alpha = alpha
480 |         if alpha > 0:
481 |             self.distance_mask = distance_mask
482 |             self.max_distance = np.max(distance_mask, axis=-1)
483 |         self.reset()
484 | 
485 |     def reset(self, done=False):
486 |         # the done before each step is required
487 |         self.obs = []
488 |         self.acts = []
489 |         self.rs = []
490 |         self.vs = []
491 |         self.adds = []
492 |         self.dones = [done]
493 | 
494 |     def add_transition(self, ob, na, a, r, v, done):
495 |         self.obs.append(ob)
496 |         self.adds.append(na)
497 |         self.acts.append(a)
498 |         self.rs.append(r)
499 |         self.vs.append(v)
500 |         self.dones.append(done)
501 | 
502 |     def sample_transition(self, R, dt=0):
503 |         if self.alpha < 0:
504 |             self._add_R_Adv(R)
505 |         else:
506 |             self._add_s_R_Adv(R)
507 |         obs = np.array(self.obs, dtype=np.float32)
508 |         nas = np.array(self.adds, dtype=np.int32)
509 |         acts = np.array(self.acts, dtype=np.int32)
510 |         Rs = np.array(self.Rs, dtype=np.float32)
511 |         Advs = np.array(self.Advs, dtype=np.float32)
512 |         # use pre-step dones here
513 |         dones = np.array(self.dones[:-1], dtype=np.bool)
514 |         self.reset(self.dones[-1])
515 |         return obs, nas, acts, dones, Rs, Advs
516 | 
517 |     def _add_R_Adv(self, R):
518 |         Rs = []
519 |         Advs = []
520 |         # use post-step dones here
521 |         for r, v, done in zip(self.rs[::-1], self.vs[::-1], self.dones[:0:-1]):
522 |             R = r + self.gamma * R * (1.-done)
523 |             Adv = R - v
524 |             Rs.append(R)
525 |             Advs.append(Adv)
526 |         Rs.reverse()
527 |         Advs.reverse()
528 |         self.Rs = Rs
529 |         self.Advs = Advs
530 | 
531 |     def _add_st_R_Adv(self, R, dt):
532 |         Rs = []
533 |         Advs = []
534 |         # use post-step dones here
535 |         tdiff = dt
536 |         for r, v, done in zip(self.rs[::-1], self.vs[::-1], self.dones[:0:-1]):
537 |             R = self.gamma * R * (1.-done)
538 |             if done:
539 |                 tdiff = 0
540 |             # additional spatial rewards
541 |             tmax = min(tdiff, self.max_distance)
542 |             for t in range(tmax + 1):
543 |                 rt = np.sum(r[self.distance_mask == t])
544 |                 R += (self.gamma * self.alpha) ** t * rt
545 |             Adv = R - v
546 |             tdiff += 1
547 |             Rs.append(R)
548 |             Advs.append(Adv)
549 |         Rs.reverse()
550 |         Advs.reverse()
551 |         self.Rs = Rs
552 |         self.Advs = Advs
553 | 
554 |     def _add_s_R_Adv(self, R):
555 |         Rs = []
556 |         Advs = []
557 |         # use post-step dones here
558 |         for r, v, done in zip(self.rs[::-1], self.vs[::-1], self.dones[:0:-1]):
559 |             R = self.gamma * R * (1.-done)
560 |             # additional spatial rewards
561 |             for t in range(self.max_distance + 1):
562 |                 rt = np.sum(r[self.distance_mask == t])
563 |                 R += (self.alpha ** t) * rt
564 |             Adv = R - v
565 |             Rs.append(R)
566 |             Advs.append(Adv)
567 |         Rs.reverse()
568 |         Advs.reverse()
569 |         self.Rs = Rs
570 |         self.Advs = Advs
571 | 
572 | 
573 | class MultiAgentOnPolicyBuffer(OnPolicyBuffer):
574 |     def __init__(self, gamma, alpha, distance_mask):
575 |         super().__init__(gamma, alpha, distance_mask)
576 | 
577 |     def sample_transition(self, R, dt=0):
578 |         if self.alpha < 0:
579 |             self._add_R_Adv(R)
580 |         else:
581 |             self._add_s_R_Adv(R)
582 |         obs = np.transpose(np.array(self.obs, dtype=np.float32), (1, 0, 2))
583 |         policies = np.transpose(np.array(self.adds, dtype=np.float32), (1, 0, 2))
584 |         acts = np.transpose(np.array(self.acts, dtype=np.int32))
585 |         Rs = np.array(self.Rs, dtype=np.float32)
586 |         Advs = np.array(self.Advs, dtype=np.float32)
587 |         dones = np.array(self.dones[:-1], dtype=np.bool)
588 |         self.reset(self.dones[-1])
589 |         return obs, policies, acts, dones, Rs, Advs
590 | 
591 |     def _add_R_Adv(self, R):
592 |         Rs = []
593 |         Advs = []
594 |         vs = np.array(self.vs)
595 |         for i in range(vs.shape[1]):
596 |             cur_Rs = []
597 |             cur_Advs = []
598 |             cur_R = R[i]
599 |             for r, v, done in zip(self.rs[::-1], vs[::-1,i], self.dones[:0:-1]):
600 |                 cur_R = r + self.gamma * cur_R * (1.-done)
601 |                 cur_Adv = cur_R - v
602 |                 cur_Rs.append(cur_R)
603 |                 cur_Advs.append(cur_Adv)
604 |             cur_Rs.reverse()
605 |             cur_Advs.reverse()
606 |             Rs.append(cur_Rs)
607 |             Advs.append(cur_Advs)
608 |         self.Rs = np.array(Rs)
609 |         self.Advs = np.array(Advs)
610 | 
611 |     def _add_st_R_Adv(self, R, dt):
612 |         Rs = []
613 |         Advs = []
614 |         vs = np.array(self.vs)
615 |         for i in range(vs.shape[1]):
616 |             cur_Rs = []
617 |             cur_Advs = []
618 |             cur_R = R[i]
619 |             tdiff = dt
620 |             distance_mask = self.distance_mask[i]
621 |             max_distance = self.max_distance[i]
622 |             for r, v, done in zip(self.rs[::-1], vs[::-1,i], self.dones[:0:-1]):
623 |                 cur_R = self.gamma * cur_R * (1.-done)
624 |                 if done:
625 |                     tdiff = 0
626 |                 # additional spatial rewards
627 |                 tmax = min(tdiff, max_distance)
628 |                 for t in range(tmax + 1):
629 |                     rt = np.sum(r[distance_mask==t])
630 |                     cur_R += (self.gamma * self.alpha) ** t * rt
631 |                 cur_Adv = cur_R - v
632 |                 tdiff += 1
633 |                 cur_Rs.append(cur_R)
634 |                 cur_Advs.append(cur_Adv)
635 |             cur_Rs.reverse()
636 |             cur_Advs.reverse()
637 |             Rs.append(cur_Rs)
638 |             Advs.append(cur_Advs)
639 |         self.Rs = np.array(Rs)
640 |         self.Advs = np.array(Advs)
641 | 
642 |     def _add_s_R_Adv(self, R):
643 |         Rs = []
644 |         Advs = []
645 |         vs = np.array(self.vs)
646 |         for i in range(vs.shape[1]):
647 |             cur_Rs = []
648 |             cur_Advs = []
649 |             cur_R = R[i]
650 |             distance_mask = self.distance_mask[i]
651 |             max_distance = self.max_distance[i]
652 |             for r, v, done in zip(self.rs[::-1], vs[::-1,i], self.dones[:0:-1]):
653 |                 cur_R = self.gamma * cur_R * (1.-done)
654 |                 # additional spatial rewards
655 |                 for t in range(max_distance + 1):
656 |                     rt = np.sum(r[distance_mask==t])
657 |                     cur_R += (self.alpha ** t) * rt
658 |                 cur_Adv = cur_R - v
659 |                 cur_Rs.append(cur_R)
660 |                 cur_Advs.append(cur_Adv)
661 |             cur_Rs.reverse()
662 |             cur_Advs.reverse()
663 |             Rs.append(cur_Rs)
664 |             Advs.append(cur_Advs)
665 |         self.Rs = np.array(Rs)
666 |         self.Advs = np.array(Advs)
667 | 
668 | """
669 | util functions
670 | """
671 | class Scheduler:
672 |     def __init__(self, val_init, val_min=0, total_step=0, decay='linear'):
673 |         self.val = val_init
674 |         self.N = float(total_step)
675 |         self.val_min = val_min
676 |         self.decay = decay
677 |         self.n = 0
678 | 
679 |     def get(self, n_step):
680 |         self.n += n_step
681 |         if self.decay == 'linear':
682 |             return max(self.val_min, self.val * (1 - self.n / self.N))
683 |         else:
684 |             return self.val
685 | 
686 | 


--------------------------------------------------------------------------------
/config/config_greedy.ini:
--------------------------------------------------------------------------------
 1 | [ENV_CONFIG]
 2 | clip_wave = -1.0
 3 | clip_wait = -1.0
 4 | control_interval_sec = 5
 5 | ; agent is greedy, ia2c, ia2c_fp, ma2c_som, ma2c_ic3, ma2c_nc.
 6 | agent = greedy
 7 | ; coop discount is used to discount the neighbors' impact
 8 | coop_gamma = 0.75
 9 | data_path = ./envs/data/
10 | episode_length_sec = 3600
11 | ; the normailization is based on typical values in sim
12 | norm_wave = 1.0
13 | norm_wait = 1.0
14 | coef_wait = 0.2
15 | peak_flow1 = 1100
16 | peak_flow2 = 925
17 | init_density = 0
18 | ; objective is chosen from queue, wait, hybrid
19 | objective = queue
20 | scenario = large_grid
21 | seed = 12
22 | test_seeds = 10000,20000,30000,40000,50000,60000,70000,80000,90000,100000
23 | yellow_interval_sec = 2
24 | 


--------------------------------------------------------------------------------
/config/config_ia2c.ini:
--------------------------------------------------------------------------------
 1 | [MODEL_CONFIG]
 2 | rmsp_alpha = 0.99
 3 | rmsp_epsilon = 1e-5
 4 | max_grad_norm = 40
 5 | gamma = 0.99
 6 | lr_init = 5e-4
 7 | lr_decay = constant
 8 | entropy_coef = 0.01
 9 | value_coef = 0.5
10 | num_lstm = 64
11 | num_fc = 64
12 | batch_size = 120
13 | reward_norm = 100.0
14 | reward_clip = -1
15 | 
16 | [TRAIN_CONFIG]
17 | total_step = 1e6
18 | test_interval = 2e6
19 | log_interval = 1e4
20 | 
21 | [ENV_CONFIG]
22 | clip_wave = 2.0
23 | clip_wait = -1
24 | control_interval_sec = 5
25 | ; agent is greedy, ia2c, ia2c_fp, ma2c_som, ma2c_ic3, ma2c_nc.
26 | agent = ia2c
27 | ; coop discount is used to discount the neighbors' impact
28 | coop_gamma = 0.9
29 | data_path = ./envs/data/
30 | episode_length_sec = 3600
31 | ; the normailization is based on typical values in sim
32 | norm_wave = 5.0
33 | norm_wait = -1
34 | coef_wait = 0
35 | peak_flow1 = 1100
36 | peak_flow2 = 925
37 | init_density = 0
38 | ; objective is chosen from queue, wait, hybrid
39 | objective = queue
40 | scenario = large_grid
41 | seed = 12
42 | test_seeds = 10000,20000,30000,40000,50000,60000,70000,80000,90000,100000
43 | yellow_interval_sec = 2
44 | 


--------------------------------------------------------------------------------
/config/config_ia2c_cu.ini:
--------------------------------------------------------------------------------
 1 | [MODEL_CONFIG]
 2 | rmsp_alpha = 0.99
 3 | rmsp_epsilon = 1e-5
 4 | max_grad_norm = 40
 5 | gamma = 0.99
 6 | lr_init = 5e-4
 7 | lr_decay = constant
 8 | entropy_coef = 0.01
 9 | value_coef = 0.5
10 | num_lstm = 64
11 | num_fc = 64
12 | batch_size = 120
13 | reward_norm = 100.0
14 | reward_clip = -1
15 | 
16 | [TRAIN_CONFIG]
17 | total_step = 1e6
18 | test_interval = 2e6
19 | log_interval = 1e4
20 | 
21 | [ENV_CONFIG]
22 | clip_wave = 2.0
23 | clip_wait = -1
24 | control_interval_sec = 5
25 | ; agent is greedy, ia2c, ia2c_fp, ma2c_som, ma2c_ic3, ma2c_nc.
26 | agent = ma2c_cu
27 | ; coop discount is used to discount the neighbors' impact
28 | coop_gamma = 0.9
29 | data_path = ./envs/data/
30 | episode_length_sec = 3600
31 | ; the normailization is based on typical values in sim
32 | norm_wave = 5.0
33 | norm_wait = -1
34 | coef_wait = 0
35 | peak_flow1 = 1100
36 | peak_flow2 = 925
37 | init_density = 0
38 | ; objective is chosen from queue, wait, hybrid
39 | objective = queue
40 | scenario = large_grid
41 | seed = 12
42 | test_seeds = 10000,20000,30000,40000,50000,60000,70000,80000,90000,100000
43 | yellow_interval_sec = 2
44 | 


--------------------------------------------------------------------------------
/config/config_ia2c_fp.ini:
--------------------------------------------------------------------------------
 1 | [MODEL_CONFIG]
 2 | rmsp_alpha = 0.99
 3 | rmsp_epsilon = 1e-5
 4 | max_grad_norm = 40
 5 | gamma = 0.99
 6 | lr_init = 5e-4
 7 | lr_decay = constant
 8 | entropy_coef = 0.01
 9 | value_coef = 0.5
10 | num_lstm = 64
11 | num_fc = 64
12 | batch_size = 120
13 | reward_norm = 100.0
14 | reward_clip = -1
15 | 
16 | [TRAIN_CONFIG]
17 | total_step = 1e6
18 | test_interval = 2e6
19 | log_interval = 1e4
20 | 
21 | [ENV_CONFIG]
22 | clip_wave = 2.0
23 | clip_wait = -1
24 | control_interval_sec = 5
25 | ; agent is greedy, ia2c, ia2c_fp, ma2c_som, ma2c_ic3, ma2c_nc.
26 | agent = ia2c_fp
27 | ; coop discount is used to discount the neighbors' impact
28 | coop_gamma = 0.9
29 | data_path = ./envs/data/
30 | episode_length_sec = 3600
31 | ; the normailization is based on typical values in sim
32 | norm_wave = 5.0
33 | norm_wait = -1
34 | coef_wait = 0
35 | peak_flow1 = 1100
36 | peak_flow2 = 925
37 | init_density = 0
38 | ; objective is chosen from queue, wait, hybrid
39 | objective = queue
40 | scenario = large_grid
41 | seed = 12
42 | test_seeds = 10000,20000,30000,40000,50000,60000,70000,80000,90000,100000
43 | yellow_interval_sec = 2
44 | 


--------------------------------------------------------------------------------
/config/config_ma2c_dial.ini:
--------------------------------------------------------------------------------
 1 | [MODEL_CONFIG]
 2 | rmsp_alpha = 0.99
 3 | rmsp_epsilon = 1e-5
 4 | max_grad_norm = 40
 5 | gamma = 0.99
 6 | lr_init = 5e-4
 7 | lr_decay = constant
 8 | entropy_coef = 0.01
 9 | value_coef = 0.5
10 | num_lstm = 64
11 | num_fc = 64
12 | batch_size = 120
13 | reward_norm = 2000.0
14 | reward_clip = -1
15 | 
16 | [TRAIN_CONFIG]
17 | total_step = 1e6
18 | test_interval = 2e6
19 | log_interval = 1e4
20 | 
21 | [ENV_CONFIG]
22 | clip_wave = 2.0
23 | clip_wait = -1
24 | control_interval_sec = 5
25 | ; agent is greedy, ia2c, ia2c_fp, ma2c_som, ma2c_ic3, ma2c_nc.
26 | agent = ma2c_dial
27 | ; coop discount is used to discount the neighbors' impact
28 | coop_gamma = -1
29 | data_path = ./envs/data/
30 | episode_length_sec = 3600
31 | ; the normailization is based on typical values in sim
32 | norm_wave = 5.0
33 | norm_wait = -1
34 | coef_wait = 0
35 | peak_flow1 = 1100
36 | peak_flow2 = 925
37 | init_density = 0
38 | ; objective is chosen from queue, wait, hybrid
39 | objective = queue
40 | scenario = large_grid
41 | seed = 12
42 | test_seeds = 10000
43 | yellow_interval_sec = 2
44 | 


--------------------------------------------------------------------------------
/config/config_ma2c_ic3.ini:
--------------------------------------------------------------------------------
 1 | [MODEL_CONFIG]
 2 | rmsp_alpha = 0.99
 3 | rmsp_epsilon = 1e-5
 4 | max_grad_norm = 40
 5 | gamma = 0.99
 6 | lr_init = 5e-4
 7 | lr_decay = constant
 8 | entropy_coef = 0.01
 9 | value_coef = 0.5
10 | num_lstm = 64
11 | num_fc = 64
12 | batch_size = 120
13 | reward_norm = 2000.0
14 | reward_clip = -1
15 | 
16 | [TRAIN_CONFIG]
17 | total_step = 1e6
18 | test_interval = 2e6
19 | log_interval = 1e4
20 | 
21 | [ENV_CONFIG]
22 | clip_wave = 2.0
23 | clip_wait = -1
24 | control_interval_sec = 5
25 | ; agent is greedy, ia2c, ia2c_fp, ma2c_som, ma2c_ic3, ma2c_nc.
26 | agent = ma2c_ic3
27 | ; coop discount is used to discount the neighbors' impact
28 | coop_gamma = -1
29 | data_path = ./envs/data/
30 | episode_length_sec = 3600
31 | ; the normailization is based on typical values in sim
32 | norm_wave = 5.0
33 | norm_wait = -1
34 | coef_wait = 0
35 | peak_flow1 = 1100
36 | peak_flow2 = 925
37 | init_density = 0
38 | ; objective is chosen from queue, wait, hybrid
39 | objective = queue
40 | scenario = large_grid
41 | seed = 12
42 | test_seeds = 10000
43 | yellow_interval_sec = 2
44 | 


--------------------------------------------------------------------------------
/config/config_ma2c_nc.ini:
--------------------------------------------------------------------------------
 1 | [MODEL_CONFIG]
 2 | rmsp_alpha = 0.99
 3 | rmsp_epsilon = 1e-5
 4 | max_grad_norm = 40
 5 | gamma = 0.99
 6 | lr_init = 5e-4
 7 | lr_decay = constant
 8 | entropy_coef = 0.01
 9 | value_coef = 0.5
10 | num_lstm = 64
11 | num_fc = 64
12 | batch_size = 120
13 | reward_norm = 2000.0
14 | reward_clip = -1
15 | 
16 | [TRAIN_CONFIG]
17 | total_step = 1e6
18 | test_interval = 2e6
19 | log_interval = 1e4
20 | 
21 | [ENV_CONFIG]
22 | clip_wave = 2.0
23 | clip_wait = -1
24 | control_interval_sec = 5
25 | ; agent is greedy, ia2c, ia2c_fp, ma2c_som, ma2c_ic3, ma2c_nc.
26 | agent = ma2c_nc
27 | ; coop discount is used to discount the neighbors' impact
28 | coop_gamma = -1
29 | data_path = ./envs/data/
30 | episode_length_sec = 3600
31 | ; the normailization is based on typical values in sim
32 | norm_wave = 5.0
33 | norm_wait = -1
34 | coef_wait = 0
35 | peak_flow1 = 1100
36 | peak_flow2 = 925
37 | init_density = 0
38 | ; objective is chosen from queue, wait, hybrid
39 | objective = queue
40 | scenario = large_grid
41 | seed = 12
42 | test_seeds = 10000
43 | yellow_interval_sec = 2
44 | 


--------------------------------------------------------------------------------
/envs/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MUmarJaved/MultiAgent-Distributed-Reinforcement-Learning/d5a0d7226011f7050f643b66e51e141277dd0e58/envs/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/envs/__pycache__/env.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MUmarJaved/MultiAgent-Distributed-Reinforcement-Learning/d5a0d7226011f7050f643b66e51e141277dd0e58/envs/__pycache__/env.cpython-37.pyc


--------------------------------------------------------------------------------
/envs/__pycache__/large_grid_env.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MUmarJaved/MultiAgent-Distributed-Reinforcement-Learning/d5a0d7226011f7050f643b66e51e141277dd0e58/envs/__pycache__/large_grid_env.cpython-37.pyc


--------------------------------------------------------------------------------
/envs/data/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MUmarJaved/MultiAgent-Distributed-Reinforcement-Learning/d5a0d7226011f7050f643b66e51e141277dd0e58/envs/data/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/envs/data/__pycache__/build_file.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MUmarJaved/MultiAgent-Distributed-Reinforcement-Learning/d5a0d7226011f7050f643b66e51e141277dd0e58/envs/data/__pycache__/build_file.cpython-37.pyc


--------------------------------------------------------------------------------
/envs/data/build_file.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | build *.xml files for a large 5 x 5 network
  4 | w/ the traffic dynamics modified from the following paper:
  5 | 
  6 | Chu, Tianshu, Shuhui Qu, and Jie Wang. "Large-scale traffic grid signal control with
  7 | regional reinforcement learning." American Control Conference (ACC), 2016. IEEE, 2016.
  8 | 
  9 | @author: Tianshu Chu
 10 | """
 11 | import numpy as np
 12 | import os
 13 | 
 14 | MAX_CAR_NUM = 30
 15 | SPEED_LIMIT_ST = 20
 16 | SPEED_LIMIT_AV = 11
 17 | L0 = 200
 18 | L0_end = 75
 19 | N = 5
 20 | 
 21 | 
 22 | def write_file(path, content):
 23 |     with open(path, 'w') as f:
 24 |         f.write(content)
 25 | 
 26 | 
 27 | def output_nodes(node):
 28 |     str_nodes = '<nodes>\n'
 29 |     # traffic light nodes
 30 |     ind = 1
 31 |     for dy in np.arange(0, L0 * 5, L0):
 32 |         for dx in np.arange(0, L0 * 5, L0):
 33 |             str_nodes += node % ('nt' + str(ind), dx, dy, 'traffic_light')
 34 |             ind += 1
 35 |     # other nodes
 36 |     ind = 1
 37 |     for dx in np.arange(0, L0 * 5, L0):
 38 |         str_nodes += node % ('np' + str(ind), dx, -L0_end, 'priority')
 39 |         ind += 1
 40 |     for dy in np.arange(0, L0 * 5, L0):
 41 |         str_nodes += node % ('np' + str(ind), L0 * 4 + L0_end, dy, 'priority')
 42 |         ind += 1
 43 |     for dx in np.arange(L0 * 4, -1, -L0):
 44 |         str_nodes += node % ('np' + str(ind), dx, L0 * 4 + L0_end, 'priority')
 45 |         ind += 1
 46 |     for dy in np.arange(L0 * 4, -1, -L0):
 47 |         str_nodes += node % ('np' + str(ind), -L0_end, dy, 'priority')
 48 |         ind += 1
 49 |     str_nodes += '</nodes>\n'
 50 |     return str_nodes
 51 | 
 52 | 
 53 | def output_road_types():
 54 |     str_types = '<types>\n'
 55 |     str_types += '  <type id="a" priority="2" numLanes="2" speed="%.2f"/>\n' % SPEED_LIMIT_ST
 56 |     str_types += '  <type id="b" priority="1" numLanes="1" speed="%.2f"/>\n' % SPEED_LIMIT_AV
 57 |     str_types += '</types>\n'
 58 |     return str_types
 59 | 
 60 | 
 61 | def get_edge_str(edge, from_node, to_node, edge_type):
 62 |     edge_id = '%s_%s' % (from_node, to_node)
 63 |     return edge % (edge_id, from_node, to_node, edge_type)
 64 | 
 65 | 
 66 | def output_edges(edge):
 67 |     str_edges = '<edges>\n'
 68 |     # external roads
 69 |     in_edges = [5, 10, 15, 20, 25, 21, 16, 11, 6, 1]
 70 |     out_edges = [6, 7, 8, 9, 10, 16, 17, 18, 19, 20]
 71 |     for in_i, out_i in zip(in_edges, out_edges):
 72 |         in_node = 'nt' + str(in_i)
 73 |         out_node = 'np' + str(out_i)
 74 |         str_edges += get_edge_str(edge, in_node, out_node, 'a')
 75 |         str_edges += get_edge_str(edge, out_node, in_node, 'a')
 76 | 
 77 |     in_edges = [1, 2, 3, 4, 5, 25, 24, 23, 22, 21]
 78 |     out_edges = [1, 2, 3, 4, 5, 11, 12, 13, 14, 15]
 79 |     for in_i, out_i in zip(in_edges, out_edges):
 80 |         in_node = 'nt' + str(in_i)
 81 |         out_node = 'np' + str(out_i)
 82 |         str_edges += get_edge_str(edge, in_node, out_node, 'b')
 83 |         str_edges += get_edge_str(edge, out_node, in_node, 'b')
 84 |     # internal roads
 85 |     for i in range(1, 25, 5):
 86 |         for j in range(4):
 87 |             from_node = 'nt' + str(i + j)
 88 |             to_node = 'nt' + str(i + j + 1)
 89 |             str_edges += get_edge_str(edge, from_node, to_node, 'a')
 90 |             str_edges += get_edge_str(edge, to_node, from_node, 'a')
 91 |     for i in range(1, 6):
 92 |         for j in range(0, 20, 5):
 93 |             from_node = 'nt' + str(i + j)
 94 |             to_node = 'nt' + str(i + j + 5)
 95 |             str_edges += get_edge_str(edge, from_node, to_node, 'b')
 96 |             str_edges += get_edge_str(edge, to_node, from_node, 'b')
 97 |     str_edges += '</edges>\n'
 98 |     return str_edges
 99 | 
100 | 
101 | def get_con_str(con, from_node, cur_node, to_node, from_lane, to_lane):
102 |     from_edge = '%s_%s' % (from_node, cur_node)
103 |     to_edge = '%s_%s' % (cur_node, to_node)
104 |     return con % (from_edge, to_edge, from_lane, to_lane)
105 | 
106 | 
107 | def get_con_str_set(con, cur_node, n_node, s_node, w_node, e_node):
108 |     str_cons = ''
109 |     # go-through
110 |     str_cons += get_con_str(con, s_node, cur_node, n_node, 0, 0)
111 |     str_cons += get_con_str(con, n_node, cur_node, s_node, 0, 0)
112 |     str_cons += get_con_str(con, w_node, cur_node, e_node, 0, 0)
113 |     str_cons += get_con_str(con, e_node, cur_node, w_node, 0, 0)
114 |     # left-turn
115 |     str_cons += get_con_str(con, s_node, cur_node, w_node, 0, 1)
116 |     str_cons += get_con_str(con, n_node, cur_node, e_node, 0, 1)
117 |     str_cons += get_con_str(con, w_node, cur_node, n_node, 1, 0)
118 |     str_cons += get_con_str(con, e_node, cur_node, s_node, 1, 0)
119 |     # right-turn
120 |     str_cons += get_con_str(con, s_node, cur_node, e_node, 0, 0)
121 |     str_cons += get_con_str(con, n_node, cur_node, w_node, 0, 0)
122 |     str_cons += get_con_str(con, w_node, cur_node, s_node, 0, 0)
123 |     str_cons += get_con_str(con, e_node, cur_node, n_node, 0, 0)
124 |     return str_cons
125 | 
126 | 
127 | def output_connections(con):
128 |     str_cons = '<connections>\n'
129 |     # edge nodes
130 |     in_edges = [5, 10, 15, 20, 25, 21, 16, 11, 6, 1]
131 |     out_edges = [6, 7, 8, 9, 10, 16, 17, 18, 19, 20]
132 |     for i, j in zip(in_edges, out_edges):
133 |         if i == 5:
134 |             s_node = 'np5'
135 |         elif i == 1:
136 |             s_node = 'np1'
137 |         else:
138 |             s_node = 'nt' + str(i - 5)
139 |         if i == 25:
140 |             n_node = 'np11'
141 |         elif i == 21:
142 |             n_node = 'np15'
143 |         else:
144 |             n_node = 'nt' + str(i + 5)
145 |         if i % 5 == 1:
146 |             w_node = 'np' + str(j)
147 |         else:
148 |             w_node = 'nt' + str(i - 1)
149 |         if i % 5 == 0:
150 |             e_node = 'np' + str(j)
151 |         else:
152 |             e_node = 'nt' + str(i + 1)
153 |         cur_node = 'nt' + str(i)
154 |         str_cons += get_con_str_set(con, cur_node, n_node, s_node, w_node, e_node)
155 | 
156 |     in_edges = [2, 3, 4, 24, 23, 22]
157 |     out_edges = [2, 3, 4, 12, 13, 14]
158 |     for i, j in zip(in_edges, out_edges):
159 |         w_node = 'nt' + str(i - 1)
160 |         e_node = 'nt' + str(i + 1)
161 |         if i <= 5:
162 |             s_node = 'np' + str(j)
163 |         else:
164 |             s_node = 'nt' + str(i - 5)
165 |         if i >= 20:
166 |             n_node = 'np' + str(j)
167 |         else:
168 |             n_node = 'nt' + str(i + 5)
169 |         cur_node = 'nt' + str(i)
170 |         str_cons += get_con_str_set(con, cur_node, n_node, s_node, w_node, e_node)
171 | 
172 |     # internal nodes
173 |     for i in [7, 8, 9, 12, 13, 14, 17, 18, 19]:
174 |         n_node = 'nt' + str(i + 5)
175 |         s_node = 'nt' + str(i - 5)
176 |         w_node = 'nt' + str(i - 1)
177 |         e_node = 'nt' + str(i + 1)
178 |         cur_node = 'nt' + str(i)
179 |         str_cons += get_con_str_set(con, cur_node, n_node, s_node, w_node, e_node)
180 | 
181 |     str_cons += '</connections>\n'
182 |     return str_cons
183 | 
184 | 
185 | def output_netconfig():
186 |     str_config = '<configuration>\n  <input>\n'
187 |     str_config += '    <edge-files value="exp.edg.xml"/>\n'
188 |     str_config += '    <node-files value="exp.nod.xml"/>\n'
189 |     str_config += '    <type-files value="exp.typ.xml"/>\n'
190 |     str_config += '    <tllogic-files value="exp.tll.xml"/>\n'
191 |     str_config += '    <connection-files value="exp.con.xml"/>\n'
192 |     str_config += '  </input>\n  <output>\n'
193 |     str_config += '    <output-file value="exp.net.xml"/>\n'
194 |     str_config += '  </output>\n</configuration>\n'
195 |     return str_config
196 | 
197 | 
198 | def get_external_od(out_edges, dest=True):
199 |     edge_maps = [0, 1, 2, 3, 4, 5, 5, 10, 15, 20, 25,
200 |                  25, 24, 23, 22, 21, 21, 16, 11, 6, 1]
201 |     cur_dest = []
202 |     for out_edge in out_edges:
203 |         in_edge = edge_maps[out_edge]
204 |         in_node = 'nt' + str(in_edge)
205 |         out_node = 'np' + str(out_edge)
206 |         if dest:
207 |             edge = '%s_%s' % (in_node, out_node)
208 |         else:
209 |             edge = '%s_%s' % (out_node, in_node)
210 |         cur_dest.append(edge)
211 |     return cur_dest
212 | 
213 | 
214 | def sample_od_pair(orig_edges, dest_edges):
215 |     from_edges = []
216 |     to_edges = []
217 |     for i in range(len(orig_edges)):
218 |         from_edges.append(np.random.choice(orig_edges[i]))
219 |         to_edges.append(np.random.choice(dest_edges))
220 |     return from_edges, to_edges
221 | 
222 | 
223 | def init_routes(density):
224 |     init_flow = '  <flow id="i%s" departPos="random_free" from="%s" to="%s" begin="0" end="1" departLane="%d" departSpeed="0" number="%d" type="type1"/>\n'
225 |     output = ''
226 |     in_nodes = [5, 10, 15, 20, 25, 21, 16, 11, 6, 1,
227 |                 1, 2, 3, 4, 5, 25, 24, 23, 22, 21]
228 |     out_nodes = [6, 7, 8, 9, 10, 16, 17, 18, 19, 20,
229 |                  1, 2, 3, 4, 5, 11, 12, 13, 14, 15]
230 |     # external edges
231 |     sink_edges = []
232 |     for i, j in zip(in_nodes, out_nodes):
233 |         node1 = 'nt' + str(i)
234 |         node2 = 'np' + str(j)
235 |         sink_edges.append('%s_%s' % (node1, node2))
236 | 
237 |     def get_od(node1, node2, k, lane=0):
238 |         source_edge = '%s_%s' % (node1, node2)
239 |         sink_edge = np.random.choice(sink_edges)
240 |         return init_flow % (str(k), source_edge, sink_edge, lane, car_num)
241 | 
242 |     # streets
243 |     k = 1
244 |     car_num = int(MAX_CAR_NUM * density)
245 |     for i in range(1, 25, 5):
246 |         for j in range(4):
247 |             node1 = 'nt' + str(i + j)
248 |             node2 = 'nt' + str(i + j + 1)
249 |             output += get_od(node1, node2, k)
250 |             k += 1
251 |             output += get_od(node2, node1, k)
252 |             k += 1
253 |             output += get_od(node1, node2, k, lane=1)
254 |             k += 1
255 |             output += get_od(node2, node1, k, lane=1)
256 |             k += 1
257 |     # avenues
258 |     for i in range(1, 6):
259 |         for j in range(0, 20, 5):
260 |             node1 = 'nt' + str(i + j)
261 |             node2 = 'nt' + str(i + j + 5)
262 |             output += get_od(node1, node2, k)
263 |             k += 1
264 |             output += get_od(node2, node1, k)
265 |             k += 1
266 |     return output
267 | 
268 | def output_flows(peak_flow1, peak_flow2, density, seed=None):
269 |     '''
270 |     flow1: x11, x12, x13, x14, x15 -> x1, x2, x3, x4, x5
271 |     flow2: x16, x17, x18, x19, x20 -> x6, x7, x8, x9, x10
272 |     flow3: x1, x2, x3, x4, x5 -> x15, x14, x13, x12, x11
273 |     flow4: x6, x7, x8, x9, x10 -> x20, x19, x18, x17, x16
274 |     '''
275 |     if seed is not None:
276 |         np.random.seed(seed)
277 |     ext_flow = '  <flow id="f%s" departPos="random_free" from="%s" to="%s" begin="%d" end="%d" vehsPerHour="%d" type="type1"/>\n'
278 |     str_flows = '<routes>\n'
279 |     str_flows += '  <vType id="type1" length="5" accel="5" decel="10"/>\n'
280 |     # initial traffic dist
281 |     if density > 0:
282 |         str_flows += init_routes(density)
283 | 
284 |     # create external origins and destinations for flows
285 |     srcs = []
286 |     srcs.append(get_external_od([12, 13, 14], dest=False))
287 |     srcs.append(get_external_od([16, 18, 20], dest=False))
288 |     srcs.append(get_external_od([2, 3, 4], dest=False))
289 |     srcs.append(get_external_od([6, 8, 10], dest=False))
290 | 
291 |     sinks = []
292 |     sinks.append(get_external_od([2, 3, 4]))
293 |     sinks.append(get_external_od([6, 8, 10]))
294 |     sinks.append(get_external_od([14, 13, 12]))
295 |     sinks.append(get_external_od([20, 18, 16]))
296 | 
297 |     # create volumes per 5 min for flows
298 |     ratios1 = np.array([0.4, 0.7, 0.9, 1.0, 0.75, 0.5, 0.25]) # start from 0
299 |     ratios2 = np.array([0.3, 0.8, 0.9, 1.0, 0.8, 0.6, 0.2])   # start from 15min
300 |     flows1 = peak_flow1 * 0.6 * ratios1
301 |     flows2 = peak_flow1 * ratios1
302 |     flows3 = peak_flow2 * 0.6 * ratios2
303 |     flows4 = peak_flow2 * ratios2
304 |     flows = [flows1, flows2, flows3, flows4]
305 |     times = np.arange(0, 3001, 300)
306 |     id1 = len(flows1)
307 |     id2 = len(times) - 1 - id1
308 |     for i in range(len(times) - 1):
309 |         name = str(i)
310 |         t_begin, t_end = times[i], times[i + 1]
311 |         # external flow
312 |         k = 0
313 |         if i < id1:
314 |             for j in [0, 1]:
315 |                 for e1, e2 in zip(srcs[j], sinks[j]):
316 |                     cur_name = name + '_' + str(k)
317 |                     str_flows += ext_flow % (cur_name, e1, e2, t_begin, t_end, flows[j][i])
318 |                     k += 1
319 |         if i >= id2:
320 |             for j in [2, 3]:
321 |                 for e1, e2 in zip(srcs[j], sinks[j]):
322 |                     cur_name = name + '_' + str(k)
323 |                     str_flows += ext_flow % (cur_name, e1, e2, t_begin, t_end, flows[j][i - id2])
324 |                     k += 1
325 |     str_flows += '</routes>\n'
326 |     return str_flows
327 | 
328 | 
329 | def gen_rou_file(path, peak_flow1, peak_flow2, density, seed=None, thread=None):
330 |     if thread is None:
331 |         flow_file = 'exp.rou.xml'
332 |     else:
333 |         flow_file = 'exp_%d.rou.xml' % int(thread)
334 |     write_file(path + flow_file, output_flows(peak_flow1, peak_flow2, density, seed=seed))
335 |     sumocfg_file = path + ('exp_%d.sumocfg' % thread)
336 |     write_file(sumocfg_file, output_config(thread=thread))
337 |     return sumocfg_file
338 | 
339 | 
340 | def output_config(thread=None):
341 |     if thread is None:
342 |         out_file = 'exp.rou.xml'
343 |     else:
344 |         out_file = 'exp_%d.rou.xml' % int(thread)
345 |     str_config = '<configuration>\n  <input>\n'
346 |     str_config += '    <net-file value="exp.net.xml"/>\n'
347 |     str_config += '    <route-files value="%s"/>\n' % out_file
348 |     str_config += '    <additional-files value="exp.add.xml"/>\n'
349 |     str_config += '  </input>\n  <time>\n'
350 |     str_config += '    <begin value="0"/>\n    <end value="3600"/>\n'
351 |     str_config += '  </time>\n</configuration>\n'
352 |     return str_config
353 | 
354 | 
355 | def get_ild_str(from_node, to_node, ild_str, lane_i=0):
356 |     edge = '%s_%s' % (from_node, to_node)
357 |     return ild_str % (edge, lane_i, edge, lane_i)
358 | 
359 | 
360 | def output_ild(ild):
361 |     str_adds = '<additional>\n'
362 |     in_edges = [5, 10, 15, 20, 25, 21, 16, 11, 6, 1,
363 |                 1, 2, 3, 4, 5, 25, 24, 23, 22, 21]
364 |     out_edges = [6, 7, 8, 9, 10, 16, 17, 18, 19, 20,
365 |                  1, 2, 3, 4, 5, 11, 12, 13, 14, 15]
366 |     # external edges
367 |     for k, (i, j) in enumerate(zip(in_edges, out_edges)):
368 |         node1 = 'nt' + str(i)
369 |         node2 = 'np' + str(j)
370 |         str_adds += get_ild_str(node2, node1, ild)
371 |         if k < 10:
372 |             # streets
373 |             str_adds += get_ild_str(node2, node1, ild, lane_i=1)
374 |     # streets
375 |     for i in range(1, 25, 5):
376 |         for j in range(4):
377 |             node1 = 'nt' + str(i + j)
378 |             node2 = 'nt' + str(i + j + 1)
379 |             str_adds += get_ild_str(node1, node2, ild)
380 |             str_adds += get_ild_str(node2, node1, ild)
381 |             str_adds += get_ild_str(node1, node2, ild, lane_i=1)
382 |             str_adds += get_ild_str(node2, node1, ild, lane_i=1)
383 |     # avenues
384 |     for i in range(1, 6):
385 |         for j in range(0, 20, 5):
386 |             node1 = 'nt' + str(i + j)
387 |             node2 = 'nt' + str(i + j + 5)
388 |             str_adds += get_ild_str(node1, node2, ild)
389 |             str_adds += get_ild_str(node2, node1, ild)
390 |     str_adds += '</additional>\n'
391 |     return str_adds
392 | 
393 | 
394 | def output_tls(tls, phase):
395 |     str_adds = '<additional>\n'
396 |     # all crosses have 3 phases
397 |     three_phases = ['GGgrrrGGgrrr', 'yyyrrryyyrrr',
398 |                     'rrrGrGrrrGrG', 'rrrGryrrrGry',
399 |                     'rrrGGrrrrGGr', 'rrryyrrrryyr']
400 |     phase_duration = [30, 3]
401 |     for i in range(1, 26):
402 |         node = 'nt' + str(i)
403 |         str_adds += tls % node
404 |         for k, p in enumerate(three_phases):
405 |             str_adds += phase % (phase_duration[k % 2], p)
406 |         str_adds += '  </tlLogic>\n'
407 |     str_adds += '</additional>\n'
408 |     return str_adds
409 | 
410 | 
411 | def main():
412 |     # nod.xml file
413 |     node = '  <node id="%s" x="%.2f" y="%.2f" type="%s"/>\n'
414 |     write_file('./exp.nod.xml', output_nodes(node))
415 | 
416 |     # typ.xml file
417 |     write_file('./exp.typ.xml', output_road_types())
418 | 
419 |     # edg.xml file
420 |     edge = '  <edge id="%s" from="%s" to="%s" type="%s"/>\n'
421 |     write_file('./exp.edg.xml', output_edges(edge))
422 | 
423 |     # con.xml file
424 |     con = '  <connection from="%s" to="%s" fromLane="%d" toLane="%d"/>\n'
425 |     write_file('./exp.con.xml', output_connections(con))
426 | 
427 |     # tls.xml file
428 |     tls = '  <tlLogic id="%s" programID="0" offset="0" type="static">\n'
429 |     phase = '    <phase duration="%d" state="%s"/>\n'
430 |     write_file('./exp.tll.xml', output_tls(tls, phase))
431 | 
432 |     # net config file
433 |     write_file('./exp.netccfg', output_netconfig())
434 | 
435 |     # generate net.xml file
436 |     os.system('netconvert -c exp.netccfg')
437 | 
438 |     # raw.rou.xml file
439 |     write_file('./exp.rou.xml', output_flows(1000, 2000, 0.2))
440 | 
441 |     # generate rou.xml file
442 |     # os.system('jtrrouter -n exp.net.xml -r exp.raw.rou.xml -o exp.rou.xml')
443 | 
444 |     # add.xml file
445 |     ild = '  <laneAreaDetector file="ild.out" freq="1" id="%s_%d" lane="%s_%d" pos="-50" endPos="-1"/>\n'
446 |     # ild_in = '  <inductionLoop file="ild_out.out" freq="15" id="ild_in:%s" lane="%s_0" pos="10"/>\n'
447 |     write_file('./exp.add.xml', output_ild(ild))
448 | 
449 |     # config file
450 |     write_file('./exp.sumocfg', output_config())
451 | 
452 | if __name__ == '__main__':
453 |     main()
454 | 


--------------------------------------------------------------------------------
/envs/data/intersection.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MUmarJaved/MultiAgent-Distributed-Reinforcement-Learning/d5a0d7226011f7050f643b66e51e141277dd0e58/envs/data/intersection.pdf


--------------------------------------------------------------------------------
/envs/data/network.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MUmarJaved/MultiAgent-Distributed-Reinforcement-Learning/d5a0d7226011f7050f643b66e51e141277dd0e58/envs/data/network.pdf


--------------------------------------------------------------------------------
/envs/data/view.xml:
--------------------------------------------------------------------------------
  1 | <viewsettings>
  2 |     <scheme name="custom_1">
  3 |         <opengl dither="0"/>
  4 |         <background backgroundColor="white" showGrid="0" gridXSize="100.00" gridYSize="100.00"/>
  5 |         <edges laneEdgeMode="4" scaleMode="5" laneShowBorders="1" showLinkDecals="1" showLinkRules="1" showRails="1" hideConnectors="0" widthExaggeration="1.00" minSize="0.00" showDirection="0" showSublanes="1"
  6 |                 edgeName_show="0" edgeName_size="50.00" edgeName_color="orange"
  7 |                 internalEdgeName_show="0" internalEdgeName_size="40.00" internalEdgeName_color="128,64,0"
  8 |                 cwaEdgeName_show="0" cwaEdgeName_size="50.00" cwaEdgeName_color="magenta"
  9 |                 streetName_show="0" streetName_size="55.00" streetName_color="yellow">
 10 |             <colorScheme name="uniform">
 11 |                 <entry color="black" name="road"/>
 12 |                 <entry color="grey" name="sidewalk"/>
 13 |                 <entry color="192,66,44" name="bike lane"/>
 14 |                 <entry color="0,0,0,0" name="green verge"/>
 15 |                 <entry color="150,200,200" name="waterway"/>
 16 |                 <entry color="92,92,92" name="no passenger"/>
 17 |                 <entry color="red" name="closed"/>
 18 |                 <entry color="green" name="connector"/>
 19 |             </colorScheme>
 20 |             <colorScheme name="by selection (lane-/streetwise)">
 21 |                 <entry color="grey" name="unselected"/>
 22 |                 <entry color="0,80,180" name="selected"/>
 23 |             </colorScheme>
 24 |             <colorScheme name="by permission code" interpolated="1">
 25 |                 <entry color="240,240,240" threshold="0.00"/>
 26 |                 <entry color="10,10,10" threshold="32.00"/>
 27 |                 <entry color="166,147,26" threshold="128.00"/>
 28 |                 <entry color="40,100,40" threshold="256.00"/>
 29 |                 <entry color="192,66,44" threshold="524288.00"/>
 30 |                 <entry color="grey" threshold="1048576.00"/>
 31 |                 <entry color="80,80,80" threshold="1049600.00"/>
 32 |                 <entry color="150,200,200" threshold="4194304.00"/>
 33 |                 <entry color="255,206,0" threshold="27402239.00"/>
 34 |                 <entry color="black" threshold="28188671.00"/>
 35 |                 <entry color="black" threshold="29237247.00"/>
 36 |                 <entry color="green" threshold="33554431.00"/>
 37 |             </colorScheme>
 38 |             <colorScheme name="by allowed speed (lanewise)" interpolated="1">
 39 |                 <entry color="red" threshold="0.00"/>
 40 |                 <entry color="yellow" threshold="8.33"/>
 41 |                 <entry color="green" threshold="15.28"/>
 42 |                 <entry color="cyan" threshold="22.22"/>
 43 |                 <entry color="blue" threshold="33.33"/>
 44 |                 <entry color="magenta" threshold="41.67"/>
 45 |             </colorScheme>
 46 |             <colorScheme name="by current occupancy (lanewise, brutto)" interpolated="1">
 47 |                 <entry color="235,235,235" threshold="0.00"/>
 48 |                 <entry color="green" threshold="0.25"/>
 49 |                 <entry color="yellow" threshold="0.50"/>
 50 |                 <entry color="orange" threshold="0.70"/>
 51 |                 <entry color="red" threshold="0.90"/>
 52 |             </colorScheme>
 53 |             <colorScheme name="by current occupancy (lanewise, netto)" interpolated="1">
 54 |                 <entry color="235,235,235" threshold="0.00"/>
 55 |                 <entry color="green" threshold="0.25"/>
 56 |                 <entry color="yellow" threshold="0.50"/>
 57 |                 <entry color="orange" threshold="0.70"/>
 58 |                 <entry color="red" threshold="0.85"/>
 59 |             </colorScheme>
 60 |             <colorScheme name="by first vehicle waiting time (lanewise)" interpolated="1">
 61 |                 <entry color="235,235,235" threshold="0.00"/>
 62 |                 <entry color="cyan" threshold="30.00"/>
 63 |                 <entry color="green" threshold="100.00"/>
 64 |                 <entry color="yellow" threshold="200.00"/>
 65 |                 <entry color="red" threshold="300.00"/>
 66 |             </colorScheme>
 67 |             <colorScheme name="by lane number (streetwise)" interpolated="1">
 68 |                 <entry color="red" threshold="0.00"/>
 69 |                 <entry color="blue" threshold="5.00"/>
 70 |             </colorScheme>
 71 |             <colorScheme name="by CO2 emissions" interpolated="1">
 72 |                 <entry color="green" threshold="0.00"/>
 73 |                 <entry color="red" threshold="0.27"/>
 74 |             </colorScheme>
 75 |             <colorScheme name="by CO emissions" interpolated="1">
 76 |                 <entry color="green" threshold="0.00"/>
 77 |                 <entry color="red" threshold="0.00"/>
 78 |             </colorScheme>
 79 |             <colorScheme name="by PMx emissions" interpolated="1">
 80 |                 <entry color="green" threshold="0.00"/>
 81 |                 <entry color="red" threshold="0.00"/>
 82 |             </colorScheme>
 83 |             <colorScheme name="by NOx emissions" interpolated="1">
 84 |                 <entry color="green" threshold="0.00"/>
 85 |                 <entry color="red" threshold="0.00"/>
 86 |             </colorScheme>
 87 |             <colorScheme name="by HC emissions" interpolated="1">
 88 |                 <entry color="green" threshold="0.00"/>
 89 |                 <entry color="red" threshold="0.00"/>
 90 |             </colorScheme>
 91 |             <colorScheme name="by fuel consumption" interpolated="1">
 92 |                 <entry color="green" threshold="0.00"/>
 93 |                 <entry color="red" threshold="0.07"/>
 94 |             </colorScheme>
 95 |             <colorScheme name="by noise emissions (Harmonoise)" interpolated="1">
 96 |                 <entry color="green" threshold="0.00"/>
 97 |                 <entry color="red" threshold="100.00"/>
 98 |             </colorScheme>
 99 |             <colorScheme name="by global travel time" interpolated="1">
100 |                 <entry color="green" threshold="0.00"/>
101 |                 <entry color="red" threshold="100.00"/>
102 |             </colorScheme>
103 |             <colorScheme name="by global speed percentage" interpolated="1">
104 |                 <entry color="red" threshold="0.00"/>
105 |                 <entry color="yellow" threshold="50.00"/>
106 |                 <entry color="green" threshold="100.00"/>
107 |             </colorScheme>
108 |             <colorScheme name="by given length/geometrical length" interpolated="1">
109 |                 <entry color="black" threshold="0.00"/>
110 |                 <entry color="red" threshold="0.25"/>
111 |                 <entry color="yellow" threshold="0.50"/>
112 |                 <entry color="179,179,179" threshold="1.00"/>
113 |                 <entry color="green" threshold="2.00"/>
114 |                 <entry color="blue" threshold="4.00"/>
115 |             </colorScheme>
116 |             <colorScheme name="by angle">
117 |                 <entry color="yellow"/>
118 |             </colorScheme>
119 |             <colorScheme name="by loaded weight" interpolated="1">
120 |                 <entry color="green" threshold="0.00"/>
121 |                 <entry color="red" threshold="100.00"/>
122 |             </colorScheme>
123 |             <colorScheme name="by priority" interpolated="1">
124 |                 <entry color="red" threshold="-20.00"/>
125 |                 <entry color="yellow" threshold="0.00"/>
126 |                 <entry color="green" threshold="20.00"/>
127 |             </colorScheme>
128 |             <colorScheme name="by height at start" interpolated="1">
129 |                 <entry color="blue" threshold="-10.00"/>
130 |                 <entry color="grey" threshold="0.00"/>
131 |                 <entry color="red" threshold="10.00"/>
132 |                 <entry color="yellow" threshold="50.00"/>
133 |                 <entry color="green" threshold="100.00"/>
134 |                 <entry color="magenta" threshold="200.00"/>
135 |             </colorScheme>
136 |             <colorScheme name="by height at geometry-segment start" interpolated="1">
137 |                 <entry color="blue" threshold="-10.00"/>
138 |                 <entry color="grey" threshold="0.00"/>
139 |                 <entry color="red" threshold="10.00"/>
140 |                 <entry color="yellow" threshold="50.00"/>
141 |                 <entry color="green" threshold="100.00"/>
142 |                 <entry color="magenta" threshold="200.00"/>
143 |             </colorScheme>
144 |             <colorScheme name="by inclination" interpolated="1">
145 |                 <entry color="blue" threshold="-0.30"/>
146 |                 <entry color="green" threshold="-0.10"/>
147 |                 <entry color="grey" threshold="0.00"/>
148 |                 <entry color="yellow" threshold="0.10"/>
149 |                 <entry color="red" threshold="0.30"/>
150 |             </colorScheme>
151 |             <colorScheme name="by geometry-segment inclination" interpolated="1">
152 |                 <entry color="blue" threshold="-0.30"/>
153 |                 <entry color="green" threshold="-0.10"/>
154 |                 <entry color="grey" threshold="0.00"/>
155 |                 <entry color="yellow" threshold="0.10"/>
156 |                 <entry color="red" threshold="0.30"/>
157 |             </colorScheme>
158 |             <colorScheme name="by average speed" interpolated="1">
159 |                 <entry color="red" threshold="0.00"/>
160 |                 <entry color="yellow" threshold="8.33"/>
161 |                 <entry color="green" threshold="15.28"/>
162 |                 <entry color="cyan" threshold="22.22"/>
163 |                 <entry color="blue" threshold="33.33"/>
164 |                 <entry color="magenta" threshold="41.67"/>
165 |             </colorScheme>
166 |             <colorScheme name="by average relative speed " interpolated="1">
167 |                 <entry color="red" threshold="0.00"/>
168 |                 <entry color="yellow" threshold="0.25"/>
169 |                 <entry color="green" threshold="0.50"/>
170 |                 <entry color="cyan" threshold="0.75"/>
171 |                 <entry color="blue" threshold="1.00"/>
172 |                 <entry color="magenta" threshold="1.25"/>
173 |             </colorScheme>
174 |             <colorScheme name="by routing device assumed speed " interpolated="1">
175 |                 <entry color="red" threshold="0.00"/>
176 |                 <entry color="yellow" threshold="8.33"/>
177 |                 <entry color="green" threshold="15.28"/>
178 |                 <entry color="cyan" threshold="22.22"/>
179 |                 <entry color="blue" threshold="33.33"/>
180 |                 <entry color="magenta" threshold="41.67"/>
181 |             </colorScheme>
182 |             <colorScheme name="by electricity consumption" interpolated="1">
183 |                 <entry color="green" threshold="0.00"/>
184 |                 <entry color="red" threshold="0.03"/>
185 |             </colorScheme>
186 |             <colorScheme name="by insertion-backlog (streetwise)" interpolated="1">
187 |                 <entry color="grey" threshold="0.00"/>
188 |                 <entry color="green" threshold="1.00"/>
189 |                 <entry color="yellow" threshold="10.00"/>
190 |                 <entry color="red" threshold="100.00"/>
191 |             </colorScheme>
192 |             <scalingScheme name="default">
193 |                 <entry color="1.00" name="uniform"/>
194 |             </scalingScheme>
195 |             <scalingScheme name="by selection (lane-/streetwise)">
196 |                 <entry color="0.50" name="unselected"/>
197 |                 <entry color="5.00" name="selected"/>
198 |             </scalingScheme>
199 |             <scalingScheme name="by allowed speed (lanewise)" interpolated="1">
200 |                 <entry color="0.00" threshold="0.00"/>
201 |                 <entry color="10.00" threshold="41.67"/>
202 |             </scalingScheme>
203 |             <scalingScheme name="by current occupancy (lanewise, brutto)" interpolated="1">
204 |                 <entry color="0.00" threshold="0.00"/>
205 |                 <entry color="10.00" threshold="0.95"/>
206 |             </scalingScheme>
207 |             <scalingScheme name="by current occupancy (lanewise, netto)" interpolated="1">
208 |                 <entry color="0.00" threshold="0.00"/>
209 |                 <entry color="10.00" threshold="0.95"/>
210 |             </scalingScheme>
211 |             <scalingScheme name="by first vehicle waiting time (lanewise)" interpolated="1">
212 |                 <entry color="1.00" threshold="0.00"/>
213 |                 <entry color="5.00" threshold="20.00"/>
214 |                 <entry color="10.00" threshold="60.00"/>
215 |                 <entry color="15.00" threshold="120.00"/>
216 |                 <entry color="20.00" threshold="300.00"/>
217 |             </scalingScheme>
218 |             <scalingScheme name="by lane number (streetwise)" interpolated="1">
219 |                 <entry color="1.00" threshold="0.00"/>
220 |                 <entry color="10.00" threshold="5.00"/>
221 |             </scalingScheme>
222 |             <scalingScheme name="by CO2 emissions" interpolated="1">
223 |                 <entry color="0.00" threshold="0.00"/>
224 |                 <entry color="10.00" threshold="0.27"/>
225 |             </scalingScheme>
226 |             <scalingScheme name="by CO emissions" interpolated="1">
227 |                 <entry color="0.00" threshold="0.00"/>
228 |                 <entry color="10.00" threshold="0.00"/>
229 |             </scalingScheme>
230 |             <scalingScheme name="by PMx emissions" interpolated="1">
231 |                 <entry color="0.00" threshold="0.00"/>
232 |                 <entry color="10.00" threshold="0.00"/>
233 |             </scalingScheme>
234 |             <scalingScheme name="by NOx emissions" interpolated="1">
235 |                 <entry color="0.00" threshold="0.00"/>
236 |                 <entry color="10.00" threshold="0.00"/>
237 |             </scalingScheme>
238 |             <scalingScheme name="by HC emissions" interpolated="1">
239 |                 <entry color="0.00" threshold="0.00"/>
240 |                 <entry color="10.00" threshold="0.00"/>
241 |             </scalingScheme>
242 |             <scalingScheme name="by fuel consumption" interpolated="1">
243 |                 <entry color="0.00" threshold="0.00"/>
244 |                 <entry color="10.00" threshold="0.07"/>
245 |             </scalingScheme>
246 |             <scalingScheme name="by noise emissions (Harmonoise)" interpolated="1">
247 |                 <entry color="0.00" threshold="0.00"/>
248 |                 <entry color="10.00" threshold="100.00"/>
249 |             </scalingScheme>
250 |             <scalingScheme name="by global travel time" interpolated="1">
251 |                 <entry color="0.00" threshold="0.00"/>
252 |                 <entry color="10.00" threshold="100.00"/>
253 |             </scalingScheme>
254 |             <scalingScheme name="by global speed percentage" interpolated="1">
255 |                 <entry color="0.00" threshold="0.00"/>
256 |                 <entry color="10.00" threshold="100.00"/>
257 |             </scalingScheme>
258 |             <scalingScheme name="by given length/geometrical length" interpolated="1">
259 |                 <entry color="0.00" threshold="0.00"/>
260 |                 <entry color="10.00" threshold="10.00"/>
261 |             </scalingScheme>
262 |             <scalingScheme name="by loaded weight" interpolated="1">
263 |                 <entry color="-1000.00" threshold="-1000.00"/>
264 |                 <entry color="0.00" threshold="0.00"/>
265 |                 <entry color="1000.00" threshold="1000.00"/>
266 |             </scalingScheme>
267 |             <scalingScheme name="by priority" interpolated="1">
268 |                 <entry color="0.50" threshold="-20.00"/>
269 |                 <entry color="1.00" threshold="0.00"/>
270 |                 <entry color="5.00" threshold="20.00"/>
271 |             </scalingScheme>
272 |             <scalingScheme name="by average speed" interpolated="1">
273 |                 <entry color="0.00" threshold="0.00"/>
274 |                 <entry color="10.00" threshold="41.67"/>
275 |             </scalingScheme>
276 |             <scalingScheme name="by average relative speed" interpolated="1">
277 |                 <entry color="0.00" threshold="0.00"/>
278 |                 <entry color="0.50" threshold="0.50"/>
279 |                 <entry color="2.00" threshold="1.00"/>
280 |                 <entry color="10.00" threshold="2.00"/>
281 |             </scalingScheme>
282 |             <scalingScheme name="by electricity consumption" interpolated="1">
283 |                 <entry color="0.00" threshold="0.00"/>
284 |                 <entry color="10.00" threshold="0.03"/>
285 |             </scalingScheme>
286 |             <scalingScheme name="by insertion-backlog (streetwise)" interpolated="1">
287 |                 <entry color="0.00" threshold="0.00"/>
288 |                 <entry color="1.00" threshold="1.00"/>
289 |                 <entry color="10.00" threshold="10.00"/>
290 |                 <entry color="50.00" threshold="100.00"/>
291 |             </scalingScheme>
292 |             <colorScheme name="uniform (streetwise)">
293 |                 <entry color="black"/>
294 |             </colorScheme>
295 |             <colorScheme name="by selection (streetwise)">
296 |                 <entry color="grey" name="unselected"/>
297 |                 <entry color="0,80,180" name="selected"/>
298 |             </colorScheme>
299 |             <colorScheme name="by purpose (streetwise)">
300 |                 <entry color="0,0,0,0" name="normal"/>
301 |                 <entry color="128,0,128" name="connector"/>
302 |                 <entry color="blue" name="internal"/>
303 |             </colorScheme>
304 |             <colorScheme name="by allowed speed (streetwise)" interpolated="1">
305 |                 <entry color="red" threshold="0.00"/>
306 |                 <entry color="yellow" threshold="8.33"/>
307 |                 <entry color="green" threshold="15.28"/>
308 |                 <entry color="cyan" threshold="22.22"/>
309 |                 <entry color="blue" threshold="33.33"/>
310 |                 <entry color="magenta" threshold="41.67"/>
311 |             </colorScheme>
312 |             <colorScheme name="by current occupancy (streetwise, brutto)" interpolated="1">
313 |                 <entry color="blue" threshold="0.00"/>
314 |                 <entry color="red" threshold="0.95"/>
315 |             </colorScheme>
316 |             <colorScheme name="by current speed (streetwise)" interpolated="1">
317 |                 <entry color="red" threshold="0.00"/>
318 |                 <entry color="yellow" threshold="8.33"/>
319 |                 <entry color="green" threshold="15.28"/>
320 |                 <entry color="cyan" threshold="22.22"/>
321 |                 <entry color="blue" threshold="33.33"/>
322 |                 <entry color="magenta" threshold="41.67"/>
323 |             </colorScheme>
324 |             <colorScheme name="by current flow (streetwise)" interpolated="1">
325 |                 <entry color="blue" threshold="0.00"/>
326 |                 <entry color="red" threshold="5000.00"/>
327 |             </colorScheme>
328 |             <colorScheme name="by relative speed (streetwise)" interpolated="1">
329 |                 <entry color="red" threshold="0.00"/>
330 |                 <entry color="yellow" threshold="0.25"/>
331 |                 <entry color="green" threshold="0.50"/>
332 |                 <entry color="cyan" threshold="0.75"/>
333 |                 <entry color="blue" threshold="1.00"/>
334 |                 <entry color="magenta" threshold="1.25"/>
335 |             </colorScheme>
336 |             <colorScheme name="by routing device assumed speed" interpolated="1">
337 |                 <entry color="red" threshold="0.00"/>
338 |                 <entry color="yellow" threshold="8.33"/>
339 |                 <entry color="green" threshold="15.28"/>
340 |                 <entry color="cyan" threshold="22.22"/>
341 |                 <entry color="blue" threshold="33.33"/>
342 |                 <entry color="magenta" threshold="41.67"/>
343 |             </colorScheme>
344 |             <colorScheme name="by angle">
345 |                 <entry color="yellow"/>
346 |             </colorScheme>
347 |             <colorScheme name="by segments (alternating)">
348 |                 <entry color="blue" name="odd"/>
349 |                 <entry color="red" name="even"/>
350 |             </colorScheme>
351 |             <colorScheme name="by jammed state (segmentwise)">
352 |                 <entry color="green" name="free"/>
353 |                 <entry color="red" name="jammed"/>
354 |             </colorScheme>
355 |             <colorScheme name="by current occupancy (segmentwise, brutto)" interpolated="1">
356 |                 <entry color="blue" threshold="0.00"/>
357 |                 <entry color="red" threshold="0.95"/>
358 |             </colorScheme>
359 |             <colorScheme name="by current speed (segmentwise)" interpolated="1">
360 |                 <entry color="red" threshold="0.00"/>
361 |                 <entry color="yellow" threshold="8.33"/>
362 |                 <entry color="green" threshold="15.28"/>
363 |                 <entry color="cyan" threshold="22.22"/>
364 |                 <entry color="blue" threshold="33.33"/>
365 |                 <entry color="magenta" threshold="41.67"/>
366 |             </colorScheme>
367 |             <colorScheme name="by current flow (segmentwise)" interpolated="1">
368 |                 <entry color="blue" threshold="0.00"/>
369 |                 <entry color="red" threshold="5000.00"/>
370 |             </colorScheme>
371 |             <colorScheme name="by relative speed (segmentwise)" interpolated="1">
372 |                 <entry color="red" threshold="0.00"/>
373 |                 <entry color="yellow" threshold="0.25"/>
374 |                 <entry color="green" threshold="0.50"/>
375 |                 <entry color="cyan" threshold="0.75"/>
376 |                 <entry color="blue" threshold="1.00"/>
377 |                 <entry color="magenta" threshold="1.25"/>
378 |             </colorScheme>
379 |             <colorScheme name="by insertion-backlog (streetwise)" interpolated="1">
380 |                 <entry color="grey" threshold="0.00"/>
381 |                 <entry color="green" threshold="1.00"/>
382 |                 <entry color="yellow" threshold="10.00"/>
383 |                 <entry color="red" threshold="100.00"/>
384 |             </colorScheme>
385 |             <scalingScheme name="uniform">
386 |                 <entry color="1.00"/>
387 |             </scalingScheme>
388 |             <scalingScheme name="by selection (streetwise)">
389 |                 <entry color="0.50" name="unselected"/>
390 |                 <entry color="5.00" name="selected"/>
391 |             </scalingScheme>
392 |             <scalingScheme name="by allowed speed (streetwise)" interpolated="1">
393 |                 <entry color="0.00" threshold="0.00"/>
394 |                 <entry color="10.00" threshold="41.67"/>
395 |             </scalingScheme>
396 |             <scalingScheme name="by current occupancy (streetwise, brutto)" interpolated="1">
397 |                 <entry color="0.00" threshold="0.00"/>
398 |                 <entry color="10.00" threshold="0.95"/>
399 |             </scalingScheme>
400 |             <scalingScheme name="by current speed (streetwise)" interpolated="1">
401 |                 <entry color="0.00" threshold="0.00"/>
402 |                 <entry color="10.00" threshold="41.67"/>
403 |             </scalingScheme>
404 |             <scalingScheme name="by current flow (streetwise)" interpolated="1">
405 |                 <entry color="0.00" threshold="0.00"/>
406 |                 <entry color="20.00" threshold="5000.00"/>
407 |             </scalingScheme>
408 |             <scalingScheme name="by relative speed (streetwise)" interpolated="1">
409 |                 <entry color="0.00" threshold="0.00"/>
410 |                 <entry color="20.00" threshold="1.00"/>
411 |             </scalingScheme>
412 |             <scalingScheme name="by insertion-backlog (streetwise)" interpolated="1">
413 |                 <entry color="0.00" threshold="0.00"/>
414 |                 <entry color="1.00" threshold="1.00"/>
415 |                 <entry color="10.00" threshold="10.00"/>
416 |                 <entry color="50.00" threshold="100.00"/>
417 |             </scalingScheme>
418 |         </edges>
419 |         <vehicles vehicleMode="0" vehicleQuality="0" vehicle_minSize="1.00" vehicle_exaggeration="1.00" vehicle_constantSize="0" showBlinker="1"
420 |                   vehicleName_show="0" vehicleName_size="50.00" vehicleName_color="204,153,0">
421 |             <colorScheme name="given vehicle/type/route color">
422 |                 <entry color="yellow"/>
423 |             </colorScheme>
424 |             <colorScheme name="uniform">
425 |                 <entry color="yellow"/>
426 |             </colorScheme>
427 |             <colorScheme name="given/assigned vehicle color">
428 |                 <entry color="yellow"/>
429 |             </colorScheme>
430 |             <colorScheme name="given/assigned type color">
431 |                 <entry color="yellow"/>
432 |             </colorScheme>
433 |             <colorScheme name="given/assigned route color">
434 |                 <entry color="yellow"/>
435 |             </colorScheme>
436 |             <colorScheme name="depart position as HSV">
437 |                 <entry color="yellow"/>
438 |             </colorScheme>
439 |             <colorScheme name="arrival position as HSV">
440 |                 <entry color="yellow"/>
441 |             </colorScheme>
442 |             <colorScheme name="direction/distance as HSV">
443 |                 <entry color="yellow"/>
444 |             </colorScheme>
445 |             <colorScheme name="by speed" interpolated="1">
446 |                 <entry color="red" threshold="0.00"/>
447 |                 <entry color="yellow" threshold="8.33"/>
448 |                 <entry color="green" threshold="15.28"/>
449 |                 <entry color="cyan" threshold="22.22"/>
450 |                 <entry color="blue" threshold="33.33"/>
451 |                 <entry color="magenta" threshold="41.67"/>
452 |             </colorScheme>
453 |             <colorScheme name="by action step" interpolated="1">
454 |                 <entry color="grey" threshold="0.00"/>
455 |                 <entry color="green" threshold="1.00"/>
456 |                 <entry color="80,160,80" threshold="2.00"/>
457 |             </colorScheme>
458 |             <colorScheme name="by waiting time" interpolated="1">
459 |                 <entry color="blue" threshold="0.00"/>
460 |                 <entry color="cyan" threshold="30.00"/>
461 |                 <entry color="green" threshold="100.00"/>
462 |                 <entry color="yellow" threshold="200.00"/>
463 |                 <entry color="red" threshold="300.00"/>
464 |             </colorScheme>
465 |             <colorScheme name="by accumulated waiting time" interpolated="1">
466 |                 <entry color="blue" threshold="0.00"/>
467 |                 <entry color="cyan" threshold="25.00"/>
468 |                 <entry color="green" threshold="50.00"/>
469 |                 <entry color="yellow" threshold="75.00"/>
470 |                 <entry color="red" threshold="100.00"/>
471 |             </colorScheme>
472 |             <colorScheme name="by time since lane change" interpolated="1">
473 |                 <entry color="189,189,179" threshold="-180.00"/>
474 |                 <entry color="yellow" threshold="-20.00"/>
475 |                 <entry color="red" threshold="-0.00"/>
476 |                 <entry color="179,179,179" threshold="0.00"/>
477 |                 <entry color="blue" threshold="0.00"/>
478 |                 <entry color="cyan" threshold="20.00"/>
479 |                 <entry color="179,189,189" threshold="180.00"/>
480 |             </colorScheme>
481 |             <colorScheme name="by max speed" interpolated="1">
482 |                 <entry color="red" threshold="0.00"/>
483 |                 <entry color="yellow" threshold="8.33"/>
484 |                 <entry color="green" threshold="15.28"/>
485 |                 <entry color="cyan" threshold="22.22"/>
486 |                 <entry color="blue" threshold="33.33"/>
487 |                 <entry color="magenta" threshold="41.67"/>
488 |             </colorScheme>
489 |             <colorScheme name="by CO2 emissions" interpolated="1">
490 |                 <entry color="green" threshold="0.00"/>
491 |                 <entry color="red" threshold="5.00"/>
492 |             </colorScheme>
493 |             <colorScheme name="by CO emissions" interpolated="1">
494 |                 <entry color="green" threshold="0.00"/>
495 |                 <entry color="red" threshold="0.05"/>
496 |             </colorScheme>
497 |             <colorScheme name="by PMx emissions" interpolated="1">
498 |                 <entry color="green" threshold="0.00"/>
499 |                 <entry color="red" threshold="0.01"/>
500 |             </colorScheme>
501 |             <colorScheme name="by NOx emissions" interpolated="1">
502 |                 <entry color="green" threshold="0.00"/>
503 |                 <entry color="red" threshold="0.12"/>
504 |             </colorScheme>
505 |             <colorScheme name="by HC emissions" interpolated="1">
506 |                 <entry color="green" threshold="0.00"/>
507 |                 <entry color="red" threshold="0.02"/>
508 |             </colorScheme>
509 |             <colorScheme name="by fuel consumption" interpolated="1">
510 |                 <entry color="green" threshold="0.00"/>
511 |                 <entry color="red" threshold="0.01"/>
512 |             </colorScheme>
513 |             <colorScheme name="by noise emissions (Harmonoise)" interpolated="1">
514 |                 <entry color="green" threshold="0.00"/>
515 |                 <entry color="red" threshold="100.00"/>
516 |             </colorScheme>
517 |             <colorScheme name="by reroute number" interpolated="1">
518 |                 <entry color="red" threshold="0.00"/>
519 |                 <entry color="yellow" threshold="1.00"/>
520 |                 <entry color="white" threshold="10.00"/>
521 |             </colorScheme>
522 |             <colorScheme name="by selection">
523 |                 <entry color="179,179,179" name="unselected"/>
524 |                 <entry color="0,102,204" name="selected"/>
525 |             </colorScheme>
526 |             <colorScheme name="by offset from best lane" interpolated="1">
527 |                 <entry color="red" threshold="-3.00"/>
528 |                 <entry color="yellow" threshold="-1.00"/>
529 |                 <entry color="179,179,179" threshold="0.00"/>
530 |                 <entry color="cyan" threshold="1.00"/>
531 |                 <entry color="blue" threshold="3.00"/>
532 |             </colorScheme>
533 |             <colorScheme name="by acceleration" interpolated="1">
534 |                 <entry color="64,0,0" threshold="-9.00"/>
535 |                 <entry color="red" threshold="-4.50"/>
536 |                 <entry color="yellow" threshold="-0.10"/>
537 |                 <entry color="179,179,179" threshold="0.00"/>
538 |                 <entry color="cyan" threshold="0.10"/>
539 |                 <entry color="blue" threshold="2.60"/>
540 |                 <entry color="magenta" threshold="5.20"/>
541 |             </colorScheme>
542 |             <colorScheme name="by time gap on lane" interpolated="1">
543 |                 <entry color="179,179,179" threshold="-1.00"/>
544 |                 <entry color="yellow" threshold="0.00"/>
545 |                 <entry color="cyan" threshold="1.00"/>
546 |                 <entry color="blue" threshold="2.00"/>
547 |             </colorScheme>
548 |             <colorScheme name="by depart delay" interpolated="1">
549 |                 <entry color="blue" threshold="0.00"/>
550 |                 <entry color="cyan" threshold="30.00"/>
551 |                 <entry color="green" threshold="100.00"/>
552 |                 <entry color="yellow" threshold="200.00"/>
553 |                 <entry color="red" threshold="300.00"/>
554 |             </colorScheme>
555 |             <colorScheme name="by electricity consumption" interpolated="1">
556 |                 <entry color="green" threshold="0.00"/>
557 |                 <entry color="red" threshold="5.00"/>
558 |             </colorScheme>
559 |             <colorScheme name="by time loss" interpolated="1">
560 |                 <entry color="blue" threshold="0.00"/>
561 |                 <entry color="cyan" threshold="10.00"/>
562 |                 <entry color="green" threshold="60.00"/>
563 |                 <entry color="yellow" threshold="180.00"/>
564 |                 <entry color="red" threshold="900.00"/>
565 |             </colorScheme>
566 |             <colorScheme name="random">
567 |                 <entry color="yellow"/>
568 |             </colorScheme>
569 |         </vehicles>
570 |         <persons personMode="0" personQuality="0" person_minSize="1.00" person_exaggeration="1.00" person_constantSize="0"
571 |                  personName_show="0" personName_size="50.00" personName_color="0,153,204">
572 |             <colorScheme name="given person/type color">
573 |                 <entry color="yellow"/>
574 |             </colorScheme>
575 |             <colorScheme name="uniform">
576 |                 <entry color="yellow"/>
577 |             </colorScheme>
578 |             <colorScheme name="given/assigned person color">
579 |                 <entry color="yellow"/>
580 |             </colorScheme>
581 |             <colorScheme name="given/assigned type color">
582 |                 <entry color="yellow"/>
583 |             </colorScheme>
584 |             <colorScheme name="by speed" interpolated="1">
585 |                 <entry color="red" threshold="0.00"/>
586 |                 <entry color="yellow" threshold="0.69"/>
587 |                 <entry color="green" threshold="1.39"/>
588 |                 <entry color="blue" threshold="2.78"/>
589 |             </colorScheme>
590 |             <colorScheme name="by mode" interpolated="1">
591 |                 <entry color="yellow" threshold="0.00"/>
592 |                 <entry color="blue" threshold="1.00"/>
593 |                 <entry color="red" threshold="2.00"/>
594 |                 <entry color="green" threshold="3.00"/>
595 |             </colorScheme>
596 |             <colorScheme name="by waiting time" interpolated="1">
597 |                 <entry color="blue" threshold="0.00"/>
598 |                 <entry color="cyan" threshold="30.00"/>
599 |                 <entry color="green" threshold="100.00"/>
600 |                 <entry color="yellow" threshold="200.00"/>
601 |                 <entry color="red" threshold="300.00"/>
602 |             </colorScheme>
603 |             <colorScheme name="by selection">
604 |                 <entry color="179,179,179" name="unselected"/>
605 |                 <entry color="0,102,204" name="selected"/>
606 |             </colorScheme>
607 |             <colorScheme name="by angle">
608 |                 <entry color="yellow"/>
609 |             </colorScheme>
610 |             <colorScheme name="random">
611 |                 <entry color="yellow"/>
612 |             </colorScheme>
613 |         </persons>
614 |         <containers containerMode="0" containerQuality="0" container_minSize="1.00" container_exaggeration="1.00" container_constantSize="0"
615 |                  containerName_show="0" containerName_size="50.00" containerName_color="0,153,204">
616 |             <colorScheme name="given person/type color">
617 |                 <entry color="yellow"/>
618 |             </colorScheme>
619 |             <colorScheme name="uniform">
620 |                 <entry color="yellow"/>
621 |             </colorScheme>
622 |             <colorScheme name="given/assigned person color">
623 |                 <entry color="yellow"/>
624 |             </colorScheme>
625 |             <colorScheme name="given/assigned type color">
626 |                 <entry color="yellow"/>
627 |             </colorScheme>
628 |             <colorScheme name="by speed" interpolated="1">
629 |                 <entry color="red" threshold="0.00"/>
630 |                 <entry color="yellow" threshold="0.69"/>
631 |                 <entry color="green" threshold="1.39"/>
632 |                 <entry color="blue" threshold="2.78"/>
633 |             </colorScheme>
634 |             <colorScheme name="by mode" interpolated="1">
635 |                 <entry color="yellow" threshold="0.00"/>
636 |                 <entry color="blue" threshold="1.00"/>
637 |                 <entry color="red" threshold="2.00"/>
638 |                 <entry color="green" threshold="3.00"/>
639 |             </colorScheme>
640 |             <colorScheme name="by waiting time" interpolated="1">
641 |                 <entry color="blue" threshold="0.00"/>
642 |                 <entry color="cyan" threshold="30.00"/>
643 |                 <entry color="green" threshold="100.00"/>
644 |                 <entry color="yellow" threshold="200.00"/>
645 |                 <entry color="red" threshold="300.00"/>
646 |             </colorScheme>
647 |             <colorScheme name="by selection">
648 |                 <entry color="179,179,179" name="unselected"/>
649 |                 <entry color="0,102,204" name="selected"/>
650 |             </colorScheme>
651 |             <colorScheme name="by angle">
652 |                 <entry color="yellow"/>
653 |             </colorScheme>
654 |             <colorScheme name="random">
655 |                 <entry color="yellow"/>
656 |             </colorScheme>
657 |         </containers>
658 |         <junctions junctionMode="0"
659 |                    drawLinkTLIndex_show="0" drawLinkTLIndex_size="50.00" drawLinkTLIndex_color="128,128,255"
660 |                    drawLinkJunctionIndex_show="0" drawLinkJunctionIndex_size="50.00" drawLinkJunctionIndex_color="128,128,255"
661 |                    junctionName_show="0" junctionName_size="50.00" junctionName_color="0,255,128"
662 |                    internalJunctionName_show="0" internalJunctionName_size="50.00" internalJunctionName_color="0,204,128"
663 |                    showLane2Lane="0" drawShape="1" drawCrossingsAndWalkingareas="1" junction_minSize="1.00" junction_exaggeration="1.00" junction_constantSize="0">
664 |             <colorScheme name="uniform">
665 |                 <entry color="black"/>
666 |                 <entry color="150,200,200" name="waterway"/>
667 |             </colorScheme>
668 |             <colorScheme name="by selection">
669 |                 <entry color="grey" name="unselected"/>
670 |                 <entry color="0,80,180" name="selected"/>
671 |             </colorScheme>
672 |             <colorScheme name="by type">
673 |                 <entry color="green" name="traffic_light"/>
674 |                 <entry color="0,128,0" name="traffic_light_unregulated"/>
675 |                 <entry color="yellow" name="priority"/>
676 |                 <entry color="red" name="priority_stop"/>
677 |                 <entry color="blue" name="right_before_left"/>
678 |                 <entry color="cyan" name="allway_stop"/>
679 |                 <entry color="grey" name="district"/>
680 |                 <entry color="magenta" name="unregulated"/>
681 |                 <entry color="black" name="dead_end"/>
682 |                 <entry color="orange" name="rail_signal"/>
683 |                 <entry color="192,128,64" name="zipper"/>
684 |                 <entry color="192,255,192" name="traffic_light_right_on_red"/>
685 |                 <entry color="128,0,128" name="rail_crossing"/>
686 |             </colorScheme>
687 |             <colorScheme name="by height" interpolated="1">
688 |                 <entry color="blue" threshold="-10.00"/>
689 |                 <entry color="grey" threshold="0.00"/>
690 |                 <entry color="red" threshold="10.00"/>
691 |                 <entry color="yellow" threshold="50.00"/>
692 |                 <entry color="green" threshold="100.00"/>
693 |                 <entry color="magenta" threshold="200.00"/>
694 |             </colorScheme>
695 |         </junctions>
696 |         <additionals addMode="0" add_minSize="1.00" add_exaggeration="1.00" add_constantSize="0" addName_show="0" addName_size="50.00" addName_color="255,0,128" addFullName_show="0" addFullName_size="50.00" addFullName_color="255,0,128"/>
697 |         <pois poi_minSize="0.00" poi_exaggeration="1.00" poi_constantSize="0" poiName_show="0" poiName_size="50.00" poiName_color="255,0,128" poiType_show="0" poiType_size="50.00" poiType_color="255,0,128"/>
698 |         <polys poly_minSize="0.00" poly_exaggeration="1.00" poly_constantSize="0" polyName_show="0" polyName_size="50.00" polyName_color="255,0,128" polyType_show="0" polyType_size="50.00" polyType_color="255,0,128"/>
699 |         <legend showSizeLegend="1"/>
700 |     </scheme>
701 | </viewsettings>
702 | 


--------------------------------------------------------------------------------
/envs/env.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Traffic network simulator w/ defined sumo files
  3 | @author: Tianshu Chu
  4 | """
  5 | import logging
  6 | import numpy as np
  7 | import pandas as pd
  8 | import subprocess
  9 | #from sumolib import checkBinary
 10 | import time
 11 | #import traci
 12 | import xml.etree.cElementTree as ET
 13 | 
 14 | DEFAULT_PORT = 8000
 15 | SEC_IN_MS = 1000
 16 | 
 17 | 
 18 | class PhaseSet:
 19 |     def __init__(self, phases):
 20 |         self.num_phase = len(phases)
 21 |         self.num_lane = len(phases[0])
 22 |         self.phases = phases
 23 |         self._init_phase_set()
 24 | 
 25 |     @staticmethod
 26 |     def _get_phase_lanes(phase, signal='r'):
 27 |         phase_lanes = []
 28 |         for i, l in enumerate(phase):
 29 |             if l == signal:
 30 |                 phase_lanes.append(i)
 31 |         return phase_lanes
 32 | 
 33 |     def _init_phase_set(self):
 34 |         self.red_lanes = []
 35 |         for phase in self.phases:
 36 |             self.red_lanes.append(self._get_phase_lanes(phase))
 37 | 
 38 | 
 39 | class PhaseMap:
 40 |     def __init__(self):
 41 |         self.phases = {}
 42 | 
 43 |     def get_phase(self, phase_id, action):
 44 |         # phase_type is either green or yellow
 45 |         return self.phases[phase_id].phases[int(action)]
 46 | 
 47 |     def get_phase_num(self, phase_id):
 48 |         return self.phases[phase_id].num_phase
 49 | 
 50 |     def get_lane_num(self, phase_id):
 51 |         # the lane number is link number
 52 |         return self.phases[phase_id].num_lane
 53 | 
 54 |     def get_red_lanes(self, phase_id, action):
 55 |         # the lane number is link number
 56 |         return self.phases[phase_id].red_lanes[int(action)]
 57 | 
 58 | 
 59 | class Node:
 60 |     def __init__(self, name, neighbor=[], control=False):
 61 |         self.control = control # disabled
 62 |         self.lanes_in = []
 63 |         self.ilds_in = [] # for state
 64 |         self.fingerprint = [] # local policy
 65 |         self.name = name
 66 |         self.neighbor = neighbor
 67 |         self.num_state = 0 # wave and wait should have the same dim
 68 |         self.wave_state = [] # local state
 69 |         self.wait_state = [] # local state
 70 |         self.phase_id = -1
 71 |         self.n_a = 0
 72 |         self.prev_action = -1
 73 | 
 74 | 
 75 | class TrafficSimulator:
 76 |     def __init__(self, config, output_path, is_record, record_stats, port=0):
 77 |         self.name = config.get('scenario')
 78 |         self.seed = config.getint('seed')
 79 |         self.control_interval_sec = config.getint('control_interval_sec')
 80 |         self.yellow_interval_sec = config.getint('yellow_interval_sec')
 81 |         self.episode_length_sec = config.getint('episode_length_sec')
 82 |         self.T = np.ceil(self.episode_length_sec / self.control_interval_sec)
 83 |         self.port = DEFAULT_PORT + port
 84 |         self.sim_thread = port
 85 |         self.obj = config.get('objective')
 86 |         self.data_path = config.get('data_path')
 87 |         self.agent = config.get('agent')
 88 |         self.coop_gamma = config.getfloat('coop_gamma')
 89 |         self.cur_episode = 0
 90 |         self.norms = {'wave': config.getfloat('norm_wave'),
 91 |                       'wait': config.getfloat('norm_wait')}
 92 |         self.clips = {'wave': config.getfloat('clip_wave'),
 93 |                       'wait': config.getfloat('clip_wait')}
 94 |         self.coef_wait = config.getfloat('coef_wait')
 95 |         self.train_mode = True
 96 |         test_seeds = config.get('test_seeds').split(',')
 97 |         test_seeds = [int(s) for s in test_seeds]
 98 |         self._init_map()
 99 |         self.init_data(is_record, record_stats, output_path)
100 |         self.init_test_seeds(test_seeds)
101 |         self._init_sim(self.seed)
102 |         self._init_nodes()
103 |         self.terminate()
104 | 
105 |     def collect_tripinfo(self):
106 |         # read trip xml, has to be called externally to get complete file
107 |         trip_file = self.output_path + ('%s_%s_trip.xml' % (self.name, self.agent))
108 |         tree = ET.ElementTree(file=trip_file)
109 |         for child in tree.getroot():
110 |             cur_trip = child.attrib
111 |             cur_dict = {}
112 |             cur_dict['episode'] = self.cur_episode
113 |             cur_dict['id'] = cur_trip['id']
114 |             cur_dict['depart_sec'] = cur_trip['depart']
115 |             cur_dict['arrival_sec'] = cur_trip['arrival']
116 |             cur_dict['duration_sec'] = cur_trip['duration']
117 |             cur_dict['wait_step'] = cur_trip['waitingCount']
118 |             cur_dict['wait_sec'] = cur_trip['waitingTime']
119 |             self.trip_data.append(cur_dict)
120 |         # delete the current xml
121 |         cmd = 'rm ' + trip_file
122 |         subprocess.check_call(cmd, shell=True)
123 | 
124 |     def get_fingerprint(self):
125 |         policies = []
126 |         for node_name in self.node_names:
127 |             policies.append(self.nodes[node_name].fingerprint)
128 |         return np.array(policies)
129 | 
130 |     def get_neighbor_action(self, action):
131 |         naction = []
132 |         for i in range(self.n_agent):
133 |             naction.append(action[self.neighbor_mask[i] == 1])
134 |         return naction
135 | 
136 |     def init_data(self, is_record, record_stats, output_path):
137 |         self.is_record = is_record
138 |         self.record_stats = record_stats
139 |         self.output_path = output_path
140 |         if self.is_record:
141 |             self.traffic_data = []
142 |             self.control_data = []
143 |             self.trip_data = []
144 |         if self.record_stats:
145 |             self.state_stat = {}
146 |             for state_name in self.state_names:
147 |                 self.state_stat[state_name] = []
148 | 
149 |     def init_test_seeds(self, test_seeds):
150 |         self.test_num = len(test_seeds)
151 |         self.test_seeds = test_seeds
152 | 
153 |     def output_data(self):
154 |         if not self.is_record:
155 |             logging.error('Env: no record to output!')
156 |         control_data = pd.DataFrame(self.control_data)
157 |         control_data.to_csv(self.output_path + ('%s_%s_control.csv' % (self.name, self.agent)))
158 |         traffic_data = pd.DataFrame(self.traffic_data)
159 |         traffic_data.to_csv(self.output_path + ('%s_%s_traffic.csv' % (self.name, self.agent)))
160 |         trip_data = pd.DataFrame(self.trip_data)
161 |         trip_data.to_csv(self.output_path + ('%s_%s_trip.csv' % (self.name, self.agent)))
162 | 
163 |     def reset(self, gui=False, test_ind=0):
164 |         # have to terminate previous sim before calling reset
165 |         self._reset_state()
166 |         if self.train_mode:
167 |             seed = self.seed
168 |         else:
169 |             seed = self.test_seeds[test_ind]
170 |         self._init_sim(seed, gui=gui)
171 |         self.cur_sec = 0
172 |         self.cur_episode += 1
173 |         # initialize fingerprint
174 |         self.update_fingerprint(self._init_policy())
175 |         # next environment random condition should be different
176 |         self.seed += 1
177 |         return self._get_state()
178 | 
179 |     def step(self, action):
180 |         self._set_phase(action, 'yellow', self.yellow_interval_sec)
181 |         self._simulate(self.yellow_interval_sec)
182 |         rest_interval_sec = self.control_interval_sec - self.yellow_interval_sec
183 |         self._set_phase(action, 'green', rest_interval_sec)
184 |         self._simulate(rest_interval_sec)
185 |         state = self._get_state()
186 |         reward = self._measure_reward_step()
187 |         done = False
188 |         if self.cur_sec >= self.episode_length_sec:
189 |             done = True
190 |         global_reward = np.sum(reward)
191 |         if self.is_record:
192 |             action_r = ','.join(['%d' % a for a in action])
193 |             cur_control = {'episode': self.cur_episode,
194 |                            'time_sec': self.cur_sec,
195 |                            'step': self.cur_sec / self.control_interval_sec,
196 |                            'action': action_r,
197 |                            'reward': global_reward}
198 |             self.control_data.append(cur_control)
199 | 
200 |         # use original rewards in test
201 |         if not self.train_mode:
202 |             return state, reward, done, global_reward
203 |         if (self.agent == 'greedy') or (self.coop_gamma < 0):
204 |             reward = global_reward
205 |         return state, reward, done, global_reward
206 | 
207 |     def terminate(self):
208 |         self.sim.close()
209 | 
210 |     def update_fingerprint(self, policy):
211 |         for node_name, pi in zip(self.node_names, policy):
212 |             self.nodes[node_name].fingerprint = pi
213 | 
214 |     def _get_node_phase(self, action, node_name, phase_type):
215 |         node = self.nodes[node_name]
216 |         cur_phase = self.phase_map.get_phase(node.phase_id, action)
217 |         if phase_type == 'green':
218 |             return cur_phase
219 |         prev_action = node.prev_action
220 |         node.prev_action = action
221 |         if (prev_action < 0) or (action == prev_action):
222 |             return cur_phase
223 |         prev_phase = self.phase_map.get_phase(node.phase_id, prev_action)
224 |         switch_reds = []
225 |         switch_greens = []
226 |         for i, (p0, p1) in enumerate(zip(prev_phase, cur_phase)):
227 |             if (p0 in 'Gg') and (p1 == 'r'):
228 |                 switch_reds.append(i)
229 |             elif (p0 in 'r') and (p1 in 'Gg'):
230 |                 switch_greens.append(i)
231 |         if not len(switch_reds):
232 |             return cur_phase
233 |         yellow_phase = list(cur_phase)
234 |         for i in switch_reds:
235 |             yellow_phase[i] = 'y'
236 |         for i in switch_greens:
237 |             yellow_phase[i] = 'r'
238 |         return ''.join(yellow_phase)
239 | 
240 |     def _get_node_phase_id(self, node_name):
241 |         # needs to be overwriteen
242 |         raise NotImplementedError()
243 | 
244 |     def _get_state(self):
245 |         # hard code the state ordering as wave, wait, fp
246 |         state = []
247 |         # measure the most recent state
248 |         self._measure_state_step()
249 | 
250 |         # get the appropriate state vectors
251 |         for node_name in self.node_names:
252 |             node = self.nodes[node_name]
253 |             # wave is required in state
254 |             if self.agent == 'greedy':
255 |                 state.append(node.wave_state)
256 |             else:
257 |                 cur_state = [node.wave_state]
258 | 
259 |                 # include wave states of neighbors
260 |                 if self.agent.startswith('ia2c'):
261 |                     for nnode_name in node.neighbor:
262 |                         cur_state.append(self.nodes[nnode_name].wave_state)
263 | 
264 |                 # include fingerprints of neighbors
265 |                 if self.agent == 'ia2c_fp':
266 |                     for nnode_name in node.neighbor:
267 |                         cur_state.append(self.nodes[nnode_name].fingerprint)
268 | 
269 |                 # include wait state
270 |                 if 'wait' in self.state_names:
271 |                     cur_state.append(node.wait_state)
272 |                 state.append(np.concatenate(cur_state))
273 |         return state
274 | 
275 |     def _init_action_space(self):
276 |         # for local and neighbor coop level
277 |         self.n_agent = self.n_node
278 |         # to simplify the sim, we assume all agents have the same action dim
279 |         phase_id = self._get_node_phase_id('all')
280 |         phase_num = self.phase_map.get_phase_num(phase_id)
281 |         self.n_a = phase_num
282 |         for node_name in self.node_names:
283 |             node = self.nodes[node_name]
284 |             node.phase_id = phase_id
285 |             node.n_a = phase_num
286 | 
287 |     def _init_map(self):
288 |         # needs to be overwriteen
289 |         self.neighbor_map = None
290 |         self.phase_map = None
291 |         self.state_names = None
292 |         raise NotImplementedError()
293 | 
294 |     def _init_nodes(self):
295 |         nodes = {}
296 |         tl_nodes = self.sim.trafficlight.getIDList()
297 |         for node_name in self.node_names:
298 |             if node_name not in tl_nodes:
299 |                 logging.error('node %s can not be found!' % node_name)
300 |                 exit(1)
301 |             neighbor = self.neighbor_map[node_name]
302 |             nodes[node_name] = Node(node_name,
303 |                                     neighbor=neighbor,
304 |                                     control=True)
305 |             # controlled lanes: l:j,i_k
306 |             lanes_in = self.sim.trafficlight.getControlledLanes(node_name)
307 |             nodes[node_name].lanes_in = lanes_in
308 |             ilds_in = []
309 |             for lane_name in lanes_in:
310 |                 ild_name = lane_name
311 |                 if ild_name not in ilds_in:
312 |                     ilds_in.append(ild_name)
313 |             nodes[node_name].ilds_in = ilds_in
314 |         self.nodes = nodes
315 |         s = 'Env: init %d node information:\n' % len(self.node_names)
316 |         for node in self.nodes.values():
317 |             s += node.name + ':\n'
318 |             s += '\tneigbor: %r\n' % node.neighbor
319 |             s += '\tilds_in: %r\n' % node.ilds_in
320 |         logging.info(s)
321 |         self._init_action_space()
322 |         self._init_state_space()
323 | 
324 |     def _init_policy(self):
325 |         return [np.ones(self.n_a) / self.n_a for _ in range(self.n_agent)]
326 | 
327 |     def _init_sim(self, seed, gui=False):
328 |         sumocfg_file = self._init_sim_config(seed)
329 |         if gui:
330 |             app = 'sumo-gui'
331 |         else:
332 |             app = 'sumo'
333 |         command = [checkBinary(app), '-c', sumocfg_file]
334 |         command += ['--seed', str(seed)]
335 |         command += ['--remote-port', str(self.port)]
336 |         command += ['--no-step-log', 'True']
337 |         command += ['--time-to-teleport', '600'] # long teleport for safety
338 |         command += ['--no-warnings', 'True']
339 |         command += ['--duration-log.disable', 'True']
340 |         # collect trip info if necessary
341 |         if self.is_record:
342 |             command += ['--tripinfo-output',
343 |                         self.output_path + ('%s_%s_trip.xml' % (self.name, self.agent))]
344 |         subprocess.Popen(command)
345 |         # wait 1s to establish the traci server
346 |         time.sleep(1)
347 |         self.sim = traci.connect(port=self.port)
348 | 
349 |     def _init_sim_config(self):
350 |         # needs to be overwriteen
351 |         raise NotImplementedError()
352 | 
353 |     def _init_state_space(self):
354 |         self._reset_state()
355 |         n_s_ls = []
356 |         for node_name in self.node_names:
357 |             node = self.nodes[node_name]
358 |             # fingerprint is previous policy
359 |             node.num_fingerprint = self.n_a
360 |             node.num_state = len(node.ilds_in)
361 |             num_wave = node.num_state
362 |             num_wait = 0 if 'wait' not in self.state_names else node.num_state
363 |             if self.agent.startswith('ma2c'):
364 |                 num_n = 1
365 |             else:
366 |                 num_n = 1 + len(node.neighbor)
367 |             n_s_ls.append(num_wait + num_wave * num_n)
368 |         if self.agent.startswith('ma2c'):
369 |             assert len(set(n_s_ls)) == 1
370 |             self.n_s = n_s_ls[0]
371 |         else:
372 |             self.n_s_ls = n_s_ls
373 | 
374 |     def _measure_reward_step(self):
375 |         rewards = []
376 |         for node_name in self.node_names:
377 |             queues = []
378 |             waits = []
379 |             for ild in self.nodes[node_name].ilds_in:
380 |                 if self.obj in ['queue', 'hybrid']:
381 |                     cur_queue = self.sim.lanearea.getLastStepHaltingNumber(ild)
382 |                     queues.append(cur_queue)
383 |                 if self.obj in ['wait', 'hybrid']:
384 |                     max_pos = 0
385 |                     car_wait = 0
386 |                     cur_cars = self.sim.lanearea.getLastStepVehicleIDs(ild)
387 |                     for vid in cur_cars:
388 |                         car_pos = self.sim.vehicle.getLanePosition(vid)
389 |                         if car_pos > max_pos:
390 |                             max_pos = car_pos
391 |                             car_wait = self.sim.vehicle.getWaitingTime(vid)
392 |                     waits.append(car_wait)
393 |             queue = np.sum(np.array(queues)) if len(queues) else 0
394 |             wait = np.sum(np.array(waits)) if len(waits) else 0
395 |             if self.obj == 'queue':
396 |                 reward = - queue
397 |             elif self.obj == 'wait':
398 |                 reward = - wait
399 |             else:
400 |                 reward = - queue - self.coef_wait * wait
401 |             rewards.append(reward)
402 |         return np.array(rewards)
403 | 
404 |     def _measure_state_step(self):
405 |         for node_name in self.node_names:
406 |             node = self.nodes[node_name]
407 |             for state_name in self.state_names:
408 |                 if state_name == 'wave':
409 |                     cur_state = []
410 |                     for ild in node.ilds_in:
411 |                         cur_wave = self.sim.lanearea.getLastStepVehicleNumber(ild)
412 |                         cur_state.append(cur_wave)
413 |                     cur_state = np.array(cur_state)
414 |                 elif state_name == 'wait':
415 |                     cur_state = []
416 |                     for ild in node.ilds_in:
417 |                         max_pos = 0
418 |                         car_wait = 0
419 |                         cur_cars = self.sim.lanearea.getLastStepVehicleIDs(ild)
420 |                         for vid in cur_cars:
421 |                             car_pos = self.sim.vehicle.getLanePosition(vid)
422 |                             if car_pos > max_pos:
423 |                                 max_pos = car_pos
424 |                                 car_wait = self.sim.vehicle.getWaitingTime(vid)
425 |                         cur_state.append(car_wait)
426 |                     cur_state = np.array(cur_state)
427 |                 if self.record_stats:
428 |                     self.state_stat[state_name] += list(cur_state)
429 |                 # normalization
430 |                 norm_cur_state = self._norm_clip_state(cur_state,
431 |                                                        self.norms[state_name],
432 |                                                        self.clips[state_name])
433 |                 if state_name == 'wave':
434 |                     node.wave_state = norm_cur_state
435 |                 else:
436 |                     node.wait_state = norm_cur_state
437 | 
438 |     def _measure_traffic_step(self):
439 |         cars = self.sim.vehicle.getIDList()
440 |         num_tot_car = len(cars)
441 |         num_in_car = self.sim.simulation.getDepartedNumber()
442 |         num_out_car = self.sim.simulation.getArrivedNumber()
443 |         if num_tot_car > 0:
444 |             avg_waiting_time = np.mean([self.sim.vehicle.getWaitingTime(car) for car in cars])
445 |             avg_speed = np.mean([self.sim.vehicle.getSpeed(car) for car in cars])
446 |         else:
447 |             avg_speed = 0
448 |             avg_waiting_time = 0
449 |         # all trip-related measurements are not supported by traci,
450 |         # need to read from outputfile afterwards
451 |         queues = []
452 |         for node_name in self.node_names:
453 |             for ild in self.nodes[node_name].ilds_in:
454 |                 lane_name = ild
455 |                 queues.append(self.sim.lane.getLastStepHaltingNumber(lane_name))
456 |         avg_queue = np.mean(np.array(queues))
457 |         std_queue = np.std(np.array(queues))
458 |         cur_traffic = {'episode': self.cur_episode,
459 |                        'time_sec': self.cur_sec,
460 |                        'number_total_car': num_tot_car,
461 |                        'number_departed_car': num_in_car,
462 |                        'number_arrived_car': num_out_car,
463 |                        'avg_wait_sec': avg_waiting_time,
464 |                        'avg_speed_mps': avg_speed,
465 |                        'std_queue': std_queue,
466 |                        'avg_queue': avg_queue}
467 |         self.traffic_data.append(cur_traffic)
468 | 
469 |     @staticmethod
470 |     def _norm_clip_state(x, norm, clip=-1):
471 |         x = x / norm
472 |         return x if clip < 0 else np.clip(x, 0, clip)
473 | 
474 |     def _reset_state(self):
475 |         for node_name in self.node_names:
476 |             node = self.nodes[node_name]
477 |             # prev action for yellow phase before each switch
478 |             node.prev_action = 0
479 | 
480 |     def _set_phase(self, action, phase_type, phase_duration):
481 |         for node_name, a in zip(self.node_names, list(action)):
482 |             phase = self._get_node_phase(a, node_name, phase_type)
483 |             self.sim.trafficlight.setRedYellowGreenState(node_name, phase)
484 |             self.sim.trafficlight.setPhaseDuration(node_name, phase_duration)
485 | 
486 |     def _simulate(self, num_step):
487 |         # reward = np.zeros(len(self.control_node_names))
488 |         for _ in range(num_step):
489 |             self.sim.simulationStep()
490 |             self.cur_sec += 1
491 |             if self.is_record:
492 |                 self._measure_traffic_step()
493 | 


--------------------------------------------------------------------------------
/envs/large_grid_env.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Particular class of large traffic grid
  3 | @author: Tianshu Chu
  4 | """
  5 | 
  6 | import configparser
  7 | import logging
  8 | import numpy as np
  9 | import matplotlib.pyplot as plt
 10 | import os
 11 | import seaborn as sns
 12 | import time
 13 | from envs.env import PhaseMap, PhaseSet, TrafficSimulator
 14 | from envs.data.build_file import gen_rou_file
 15 | 
 16 | sns.set_color_codes()
 17 | 
 18 | 
 19 | STATE_NAMES = ['wave']
 20 | PHASE_NUM = 5
 21 | 
 22 | 
 23 | class LargeGridPhase(PhaseMap):
 24 |     def __init__(self):
 25 |         phases = ['GGgrrrGGgrrr', 'rrrGrGrrrGrG', 'rrrGGrrrrGGr',
 26 |                   'rrrGGGrrrrrr', 'rrrrrrrrrGGG']
 27 |         self.phases = {PHASE_NUM: PhaseSet(phases)}
 28 | 
 29 | 
 30 | class LargeGridController:
 31 |     def __init__(self, node_names):
 32 |         self.name = 'greedy'
 33 |         self.node_names = node_names
 34 | 
 35 |     def forward(self, obs):
 36 |         actions = []
 37 |         for ob, node_name in zip(obs, self.node_names):
 38 |             actions.append(self.greedy(ob, node_name))
 39 |         return actions
 40 | 
 41 |     def greedy(self, ob, node_name):
 42 |         # hard code the mapping from state to number of cars
 43 |         flows = [ob[0] + ob[3], ob[2] + ob[5], ob[1] + ob[4],
 44 |                  ob[1] + ob[2], ob[4] + ob[5]]
 45 |         return np.argmax(np.array(flows))
 46 | 
 47 | 
 48 | class LargeGridEnv(TrafficSimulator):
 49 |     def __init__(self, config, port=0, output_path='', is_record=False, record_stat=False):
 50 |         self.peak_flow1 = config.getint('peak_flow1')
 51 |         self.peak_flow2 = config.getint('peak_flow2')
 52 |         self.init_density = config.getfloat('init_density')
 53 |         super().__init__(config, output_path, is_record, record_stat, port=port)
 54 | 
 55 |     def _get_node_phase_id(self, node_name):
 56 |         return PHASE_NUM
 57 | 
 58 |     def _init_neighbor_map(self):
 59 |         neighbor_map = {}
 60 |         # corner nodes
 61 |         neighbor_map['nt1'] = ['nt6', 'nt2']
 62 |         neighbor_map['nt5'] = ['nt10', 'nt4']
 63 |         neighbor_map['nt21'] = ['nt22', 'nt16']
 64 |         neighbor_map['nt25'] = ['nt20', 'nt24']
 65 |         # edge nodes
 66 |         neighbor_map['nt2'] = ['nt7', 'nt3', 'nt1']
 67 |         neighbor_map['nt3'] = ['nt8', 'nt4', 'nt2']
 68 |         neighbor_map['nt4'] = ['nt9', 'nt5', 'nt3']
 69 |         neighbor_map['nt22'] = ['nt23', 'nt17', 'nt21']
 70 |         neighbor_map['nt23'] = ['nt24', 'nt18', 'nt22']
 71 |         neighbor_map['nt24'] = ['nt25', 'nt19', 'nt23']
 72 |         neighbor_map['nt10'] = ['nt15', 'nt5', 'nt9']
 73 |         neighbor_map['nt15'] = ['nt20', 'nt10', 'nt14']
 74 |         neighbor_map['nt20'] = ['nt25', 'nt15', 'nt19']
 75 |         neighbor_map['nt6'] = ['nt11', 'nt7', 'nt1']
 76 |         neighbor_map['nt11'] = ['nt16', 'nt12', 'nt6']
 77 |         neighbor_map['nt16'] = ['nt21', 'nt17', 'nt11']
 78 |         # internal nodes
 79 |         for i in [7, 8, 9, 12, 13, 14, 17, 18, 19]:
 80 |             n_node = 'nt' + str(i + 5)
 81 |             s_node = 'nt' + str(i - 5)
 82 |             w_node = 'nt' + str(i - 1)
 83 |             e_node = 'nt' + str(i + 1)
 84 |             cur_node = 'nt' + str(i)
 85 |             neighbor_map[cur_node] = [n_node, e_node, s_node, w_node]
 86 |         self.neighbor_map = neighbor_map
 87 |         self.neighbor_mask = np.zeros((self.n_node, self.n_node))
 88 |         for i in range(self.n_node):
 89 |             for nnode in neighbor_map['nt%d' % (i+1)]:
 90 |                 ni = self.node_names.index(nnode)
 91 |                 self.neighbor_mask[i, ni] = 1
 92 |         logging.info('neighbor mask:\n %r' % self.neighbor_mask)
 93 | 
 94 |     def _init_distance_map(self):
 95 |         block0 = np.array([[0,1,2,3,4],[1,0,1,2,3],[2,1,0,1,2],[3,2,1,0,1],[4,3,2,1,0]])
 96 |         block1 = block0 + 1
 97 |         block2 = block0 + 2
 98 |         block3 = block0 + 3
 99 |         block4 = block0 + 4
100 |         row0 = np.hstack([block0, block1, block2, block3, block4])
101 |         row1 = np.hstack([block1, block0, block1, block2, block3])
102 |         row2 = np.hstack([block2, block1, block0, block1, block2])
103 |         row3 = np.hstack([block3, block2, block1, block0, block1])
104 |         row4 = np.hstack([block4, block3, block2, block1, block0])
105 |         self.distance_mask = np.vstack([row0, row1, row2, row3, row4]) 
106 | 
107 |     def _init_map(self):
108 |         self.node_names = ['nt%d' % i for i in range(1, 26)]
109 |         self.n_node = 25
110 |         self._init_neighbor_map()
111 |         # for spatial discount
112 |         self._init_distance_map()
113 |         self.max_distance = 8
114 |         self.phase_map = LargeGridPhase()
115 |         self.state_names = STATE_NAMES
116 | 
117 |     def _init_sim_config(self, seed):
118 |         return gen_rou_file(self.data_path,
119 |                             self.peak_flow1,
120 |                             self.peak_flow2,
121 |                             self.init_density,
122 |                             seed=seed,
123 |                             thread=self.sim_thread)
124 | 
125 |     def plot_stat(self, rewards):
126 |         self.state_stat['reward'] = rewards
127 |         for name, data in self.state_stat.items():
128 |             fig = plt.figure(figsize=(8, 6))
129 |             plot_cdf(data)
130 |             plt.ylabel(name)
131 |             fig.savefig(self.output_path + self.name + '_' + name + '.png')
132 | 
133 | 
134 | def plot_cdf(X, c='b', label=None):
135 |     sorted_data = np.sort(X)
136 |     yvals = np.arange(len(sorted_data))/float(len(sorted_data)-1)
137 |     plt.plot(sorted_data, yvals, color=c, label=label)
138 | 
139 | if __name__ == '__main__':
140 |     logging.basicConfig(format='%(asctime)s [%(levelname)s] %(message)s',
141 |                         level=logging.INFO)
142 |     config = configparser.ConfigParser()
143 |     config.read('./config/config_greedy.ini')
144 |     base_dir = './greedy/'
145 |     if not os.path.exists(base_dir):
146 |         os.mkdir(base_dir)
147 |     env = LargeGridEnv(config['ENV_CONFIG'], 2, base_dir, is_record=True, record_stat=True)
148 |     env.train_mode = False
149 |     time.sleep(1)
150 |     controller = LargeGridController(env.node_names)
151 |     rewards = []
152 |     for i in range(env.test_num):
153 |         ob = env.reset(test_ind=i)
154 |         while True:
155 |             next_ob, _, done, reward = env.step(controller.forward(ob))
156 |             rewards.append(reward)
157 |             if done:
158 |                 break
159 |             ob = next_ob
160 |         env.terminate()
161 |         time.sleep(2)
162 |         env.collect_tripinfo()
163 |     env.plot_stat(np.array(rewards))
164 |     logging.info('avg reward: %.2f' % np.mean(rewards))
165 |     env.output_data()
166 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Main function for training and evaluating MARL algorithms in traffic envs
  3 | @author: Tianshu Chu
  4 | """
  5 | 
  6 | import argparse
  7 | import configparser
  8 | import logging
  9 | import tensorflow as tf
 10 | import threading
 11 | from envs.large_grid_env import LargeGridEnv, LargeGridController
 12 | from agents.models import IA2C, IA2C_FP, IA2C_CU, MA2C_NC, MA2C_IC3, MA2C_DIAL
 13 | from utils import (Counter, Trainer, Tester, Evaluator,
 14 |                    check_dir, copy_file, find_file,
 15 |                    init_dir, init_log, init_test_flag,
 16 |                    plot_evaluation, plot_train)
 17 | 
 18 | 
 19 | def parse_args():
 20 |     default_base_dir = '/Users/tchu/Documents/rl_test/deeprl_dist/ma2c_ic3_test'
 21 |     default_config_dir = './config/config_ma2c_ic3.ini'
 22 |     parser = argparse.ArgumentParser()
 23 |     parser.add_argument('--base-dir', type=str, required=False,
 24 |                         default=default_base_dir, help="experiment base dir")
 25 |     subparsers = parser.add_subparsers(dest='option', help="train or evaluate")
 26 |     sp = subparsers.add_parser('train', help='train a single agent under base dir')
 27 |     sp.add_argument('--test-mode', type=str, required=False,
 28 |                     default='after_train_test',
 29 |                     help="test mode during training",
 30 |                     choices=['no_test', 'in_train_test', 'after_train_test', 'all_test'])
 31 |     sp.add_argument('--config-dir', type=str, required=False,
 32 |                     default=default_config_dir, help="experiment config path")
 33 |     sp = subparsers.add_parser('evaluate', help="evaluate and compare agents under base dir")
 34 |     sp.add_argument('--evaluate-seeds', type=str, required=False,
 35 |                     default=','.join([str(i) for i in range(2000, 2500, 10)]),
 36 |                     help="random seeds for evaluation, split by ,")
 37 |     args = parser.parse_args()
 38 |     if not args.option:
 39 |         parser.print_help()
 40 |         exit(1)
 41 |     return args
 42 | 
 43 | 
 44 | def init_env(config, port=0, naive_policy=False):
 45 |     if not naive_policy:
 46 |         return LargeGridEnv(config, port=port)
 47 |     else:
 48 |         env = LargeGridEnv(config, port=port)
 49 |         policy = LargeGridController(env.node_names)
 50 |         return env, policy
 51 | 
 52 | 
 53 | def init_agent(env, config, total_step, seed):
 54 |     if env.agent == 'ia2c':
 55 |         return IA2C(env.n_s_ls, env.n_a, env.neighbor_mask, env.distance_mask, env.coop_gamma,
 56 |                     total_step, config, seed=seed)
 57 |     elif env.agent == 'ia2c_fp':
 58 |         return IA2C_FP(env.n_s_ls, env.n_a, env.neighbor_mask, env.distance_mask, env.coop_gamma,
 59 |                        total_step, config, seed=seed)
 60 |     elif env.agent == 'ma2c_nc':
 61 |         return MA2C_NC(env.n_s, env.n_a, env.neighbor_mask, env.distance_mask, env.coop_gamma,
 62 |                        total_step, config, seed=seed)
 63 |     elif env.agent == 'ma2c_ic3':
 64 |         return MA2C_IC3(env.n_s, env.n_a, env.neighbor_mask, env.distance_mask, env.coop_gamma,
 65 |                         total_step, config, seed=seed)
 66 |     elif env.agent == 'ma2c_cu':
 67 |         return IA2C_CU(env.n_s, env.n_a, env.neighbor_mask, env.distance_mask, env.coop_gamma,
 68 |                        total_step, config, seed=seed)
 69 |     elif env.agent == 'ma2c_dial':
 70 |         return MA2C_DIAL(env.n_s, env.n_a, env.neighbor_mask, env.distance_mask, env.coop_gamma,
 71 |                          total_step, config, seed=seed)
 72 |     else:
 73 |         return None
 74 | 
 75 | 
 76 | def train(args):
 77 |     base_dir = args.base_dir
 78 |     dirs = init_dir(base_dir)
 79 |     init_log(dirs['log'])
 80 |     config_dir = args.config_dir
 81 |     copy_file(config_dir, dirs['data'])
 82 |     config = configparser.ConfigParser()
 83 |     config.read(config_dir)
 84 |     in_test, post_test = init_test_flag(args.test_mode)
 85 | 
 86 |     # init env
 87 |     env = init_env(config['ENV_CONFIG'])
 88 |     logging.info('Training: a dim %d, agent dim: %d' % (env.n_a, env.n_agent))
 89 | 
 90 |     # init step counter
 91 |     total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step'))
 92 |     test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval'))
 93 |     log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval'))
 94 |     global_counter = Counter(total_step, test_step, log_step)
 95 | 
 96 |     # init centralized or multi agent
 97 |     seed = config.getint('ENV_CONFIG', 'seed')
 98 |     model = init_agent(env, config['MODEL_CONFIG'], total_step, seed)
 99 | 
100 |     # disable multi-threading for safe SUMO implementation
101 |     summary_writer = tf.summary.FileWriter(dirs['log'])
102 |     trainer = Trainer(env, model, global_counter, summary_writer, in_test, output_path=dirs['data'])
103 |     trainer.run()
104 | 
105 |     # save model
106 |     final_step = global_counter.cur_step
107 |     logging.info('Training: save final model at step %d ...' % final_step)
108 |     model.save(dirs['model'], final_step)
109 | 
110 |     # post-training test
111 |     if post_test:
112 |         test_dirs = init_dir(base_dir, pathes=['eva_data'])
113 |         evaluator = Evaluator(env, model, test_dirs['eva_data'])
114 |         evaluator.run()
115 | 
116 | 
117 | def evaluate_fn(agent_dir, output_dir, seeds, port):
118 |     agent = agent_dir.split('/')[-1]
119 |     if not check_dir(agent_dir):
120 |         logging.error('Evaluation: %s does not exist!' % agent)
121 |         return
122 |     # load config file for env
123 |     config_dir = find_file(agent_dir + '/data/')
124 |     if not config_dir:
125 |         return
126 |     config = configparser.ConfigParser()
127 |     config.read(config_dir)
128 | 
129 |     # init env
130 |     env, greedy_policy = init_env(config['ENV_CONFIG'], port=port, naive_policy=True)
131 |     env.init_test_seeds(seeds)
132 | 
133 |     # load model for agent
134 |     if agent != 'greedy':
135 |         # init centralized or multi agent
136 |         model = init_agent(env, config['MODEL_CONFIG'], 0, 0)
137 |         if model is None:
138 |             return
139 |         if not model.load(agent_dir + '/model/'):
140 |             return
141 |     else:
142 |         model = greedy_policy
143 |     # collect evaluation data
144 |     evaluator = Evaluator(env, model, output_dir)
145 |     evaluator.run()
146 | 
147 | 
148 | def evaluate(args):
149 |     base_dir = args.base_dir
150 |     dirs = init_dir(base_dir, pathes=['eva_data', 'eva_log'])
151 |     init_log(dirs['eva_log'])
152 |     # enforce the same evaluation seeds across agents
153 |     seeds = args.evaluate_seeds
154 |     logging.info('Evaluation: random seeds: %s' % seeds)
155 |     if not seeds:
156 |         seeds = []
157 |     else:
158 |         seeds = [int(s) for s in seeds.split(',')]
159 |     evaluate_fn(base_dir, dirs['eva_data'], seeds, 1)
160 | 
161 | 
162 | if __name__ == '__main__':
163 |     args = parse_args()
164 |     if args.option == 'train':
165 |         train(args)
166 |     else:
167 |         evaluate(args)
168 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import logging
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | import time
  6 | import os
  7 | import pandas as pd
  8 | import subprocess
  9 | 
 10 | 
 11 | def check_dir(cur_dir):
 12 |     if not os.path.exists(cur_dir):
 13 |         return False
 14 |     return True
 15 | 
 16 | 
 17 | def copy_file(src_dir, tar_dir):
 18 |     cmd = 'cp %s %s' % (src_dir, tar_dir)
 19 |     subprocess.check_call(cmd, shell=True)
 20 | 
 21 | 
 22 | def find_file(cur_dir, suffix='.ini'):
 23 |     for file in os.listdir(cur_dir):
 24 |         if file.endswith(suffix):
 25 |             return cur_dir + '/' + file
 26 |     logging.error('Cannot find %s file' % suffix)
 27 |     return None
 28 | 
 29 | 
 30 | def init_dir(base_dir, pathes=['log', 'data', 'model']):
 31 |     if not os.path.exists(base_dir):
 32 |         os.mkdir(base_dir)
 33 |     dirs = {}
 34 |     for path in pathes:
 35 |         cur_dir = base_dir + '/%s/' % path
 36 |         if not os.path.exists(cur_dir):
 37 |             os.mkdir(cur_dir)
 38 |         dirs[path] = cur_dir
 39 |     return dirs
 40 | 
 41 | 
 42 | def init_log(log_dir):
 43 |     logging.basicConfig(format='%(asctime)s [%(levelname)s] %(message)s',
 44 |                         level=logging.INFO,
 45 |                         handlers=[
 46 |                             logging.FileHandler('%s/%d.log' % (log_dir, time.time())),
 47 |                             logging.StreamHandler()
 48 |                         ])
 49 | 
 50 | 
 51 | def init_test_flag(test_mode):
 52 |     if test_mode == 'no_test':
 53 |         return False, False
 54 |     if test_mode == 'in_train_test':
 55 |         return True, False
 56 |     if test_mode == 'after_train_test':
 57 |         return False, True
 58 |     if test_mode == 'all_test':
 59 |         return True, True
 60 |     return False, False
 61 | 
 62 | 
 63 | def plot_train(data_dirs, labels):
 64 |     pass
 65 | 
 66 | def plot_evaluation(data_dirs, labels):
 67 |     pass
 68 | 
 69 | 
 70 | class Counter:
 71 |     def __init__(self, total_step, test_step, log_step):
 72 |         self.counter = itertools.count(1)
 73 |         self.cur_step = 0
 74 |         self.cur_test_step = 0
 75 |         self.total_step = total_step
 76 |         self.test_step = test_step
 77 |         self.log_step = log_step
 78 |         self.stop = False
 79 | 
 80 |     def next(self):
 81 |         self.cur_step = next(self.counter)
 82 |         return self.cur_step
 83 | 
 84 |     def should_test(self):
 85 |         test = False
 86 |         if (self.cur_step - self.cur_test_step) >= self.test_step:
 87 |             test = True
 88 |             self.cur_test_step = self.cur_step
 89 |         return test
 90 | 
 91 |     def should_log(self):
 92 |         return (self.cur_step % self.log_step == 0)
 93 | 
 94 |     def should_stop(self):
 95 |         if self.cur_step >= self.total_step:
 96 |             return True
 97 |         return self.stop
 98 | 
 99 | 
100 | class Trainer():
101 |     def __init__(self, env, model, global_counter, summary_writer, run_test, output_path=None):
102 |         self.cur_step = 0
103 |         self.global_counter = global_counter
104 |         self.env = env
105 |         self.agent = self.env.agent
106 |         self.model = model
107 |         self.sess = self.model.sess
108 |         self.n_step = self.model.n_step
109 |         self.summary_writer = summary_writer
110 |         self.run_test = run_test
111 |         assert self.env.T % self.n_step == 0
112 |         self.data = []
113 |         self.output_path = output_path
114 |         if run_test:
115 |             self.test_num = self.env.test_num
116 |             logging.info('Testing: total test num: %d' % self.test_num)
117 |         self._init_summary()
118 | 
119 |     def _init_summary(self):
120 |         self.train_reward = tf.placeholder(tf.float32, [])
121 |         self.train_summary = tf.summary.scalar('train_reward', self.train_reward)
122 |         self.test_reward = tf.placeholder(tf.float32, [])
123 |         self.test_summary = tf.summary.scalar('test_reward', self.test_reward)
124 | 
125 |     def _add_summary(self, reward, global_step, is_train=True):
126 |         if is_train:
127 |             summ = self.sess.run(self.train_summary, {self.train_reward: reward})
128 |         else:
129 |             summ = self.sess.run(self.test_summary, {self.test_reward: reward})
130 |         self.summary_writer.add_summary(summ, global_step=global_step)
131 | 
132 |     def _get_policy(self, ob, done, mode='train'):
133 |         if self.agent.startswith('ma2c'):
134 |             self.ps = self.env.get_fingerprint()
135 |             policy = self.model.forward(np.array(ob), done, self.ps)
136 |         else:
137 |             policy = self.model.forward(ob, done)
138 |         action = []
139 |         for pi in policy:
140 |             if mode == 'train':
141 |                 action.append(np.random.choice(np.arange(len(pi)), p=pi))
142 |             else:
143 |                 action.append(np.argmax(pi))
144 |         return policy, np.array(action)
145 | 
146 |     def _get_value(self, ob, done, action):
147 |         if self.agent.startswith('ma2c'):
148 |             value = self.model.forward(np.array(ob), done, self.ps, np.array(action), 'v')
149 |         else:
150 |             self.naction = self.env.get_neighbor_action(action)
151 |             value = self.model.forward(ob, done, self.naction, 'v')
152 |         return value
153 | 
154 |     def explore(self, prev_ob, prev_done):
155 |         ob = prev_ob
156 |         done = prev_done
157 |         rewards = []
158 |         for _ in range(self.n_step):
159 |             # pre-decision
160 |             policy, action = self._get_policy(ob, done)
161 |             # post-decision
162 |             value = self._get_value(ob, done, action)
163 |             # transition
164 |             self.env.update_fingerprint(policy)
165 |             next_ob, reward, done, global_reward = self.env.step(action)
166 |             rewards.append(global_reward)
167 |             global_step = self.global_counter.next()
168 |             self.cur_step += 1
169 |             # collect experience
170 |             if self.agent.startswith('ma2c'):
171 |                 self.model.add_transition(ob, self.ps, action, reward, value, done)
172 |             else:
173 |                 self.model.add_transition(ob, self.naction, action, reward, value, done)
174 |             # logging
175 |             if self.global_counter.should_log():
176 |                 logging.info('''Training: global step %d, episode step %d,
177 |                                    ob: %s, a: %s, pi: %s, r: %.2f, train r: %.2f, done: %r''' %
178 |                              (global_step, self.cur_step,
179 |                               str(ob), str(action), str(policy), global_reward, np.mean(reward), done))
180 |             if done:
181 |                 break
182 |             ob = next_ob
183 |         if done:
184 |             R = np.zeros(self.model.n_agent)
185 |         else:
186 |             _, action = self._get_policy(ob, done)
187 |             R = self._get_value(ob, done, action)
188 |         return ob, done, R, rewards
189 | 
190 |     def perform(self, test_ind):
191 |         ob = self.env.reset(test_ind=test_ind)
192 |         rewards = []
193 |         while True:
194 |             if self.agent == 'greedy':
195 |                 action = self.model.forward(ob)
196 |             else:
197 |                 # in on-policy learning, test policy has to be stochastic
198 |                 # policy, action = self._get_policy(ob, False, mode='test')
199 |                 policy, action = self._get_policy(ob, False)
200 |                 self.env.update_fingerprint(policy)
201 |             next_ob, reward, done, global_reward = self.env.step(action)
202 |             rewards.append(global_reward)
203 |             if done:
204 |                 break
205 |             ob = next_ob
206 |         mean_reward = np.mean(np.array(rewards))
207 |         std_reward = np.std(np.array(rewards))
208 |         return mean_reward, std_reward
209 | 
210 |     def run_thread(self, coord):
211 |         '''Multi-threading is disabled'''
212 |         ob = self.env.reset()
213 |         done = False
214 |         cum_reward = 0
215 |         while not coord.should_stop():
216 |             ob, done, R, cum_reward = self.explore(ob, done, cum_reward)
217 |             global_step = self.global_counter.cur_step
218 |             if self.agent.endswith('a2c'):
219 |                 self.model.backward(R, self.summary_writer, global_step)
220 |             else:
221 |                 self.model.backward(self.summary_writer, global_step)
222 |             self.summary_writer.flush()
223 |             if (self.global_counter.should_stop()) and (not coord.should_stop()):
224 |                 self.env.terminate()
225 |                 coord.request_stop()
226 |                 logging.info('Training: stop condition reached!')
227 |                 return
228 | 
229 |     def run(self):
230 |         while not self.global_counter.should_stop():
231 |             # test
232 |             if self.run_test and self.global_counter.should_test():
233 |                 rewards = []
234 |                 global_step = self.global_counter.cur_step
235 |                 self.env.train_mode = False
236 |                 for test_ind in range(self.test_num):
237 |                     mean_reward, std_reward = self.perform(test_ind)
238 |                     self.env.terminate()
239 |                     rewards.append(mean_reward)
240 |                     log = {'agent': self.agent,
241 |                            'step': global_step,
242 |                            'test_id': test_ind,
243 |                            'avg_reward': mean_reward,
244 |                            'std_reward': std_reward}
245 |                     self.data.append(log)
246 |                 avg_reward = np.mean(np.array(rewards))
247 |                 self._add_summary(avg_reward, global_step, is_train=False)
248 |                 logging.info('Testing: global step %d, avg R: %.2f' %
249 |                              (global_step, avg_reward))
250 |             # train
251 |             self.env.train_mode = True
252 |             ob = self.env.reset()
253 |             done = False
254 |             self.cur_step = 0
255 |             rewards = []
256 |             while True:
257 |                 ob, done, R, cur_rewards = self.explore(ob, done)
258 |                 dt = self.env.T - self.cur_step
259 |                 rewards += cur_rewards
260 |                 global_step = self.global_counter.cur_step
261 |                 self.model.backward(R, dt, self.summary_writer, global_step)
262 |                 # termination
263 |                 if done:
264 |                     self.env.terminate()
265 |                     break
266 |             rewards = np.array(rewards)
267 |             mean_reward = np.mean(rewards)
268 |             std_reward = np.std(rewards)
269 |             log = {'agent': self.agent,
270 |                    'step': global_step,
271 |                    'test_id': -1,
272 |                    'avg_reward': mean_reward,
273 |                    'std_reward': std_reward}
274 |             self.data.append(log)
275 |             self._add_summary(mean_reward, global_step)
276 |             self.summary_writer.flush()
277 |         df = pd.DataFrame(self.data)
278 |         df.to_csv(self.output_path + 'train_reward.csv')
279 | 
280 | 
281 | class Tester(Trainer):
282 |     def __init__(self, env, model, global_counter, summary_writer, output_path):
283 |         super().__init__(env, model, global_counter, summary_writer)
284 |         self.env.train_mode = False
285 |         self.test_num = self.env.test_num
286 |         self.output_path = output_path
287 |         self.data = []
288 |         logging.info('Testing: total test num: %d' % self.test_num)
289 | 
290 |     def _init_summary(self):
291 |         self.reward = tf.placeholder(tf.float32, [])
292 |         self.summary = tf.summary.scalar('test_reward', self.reward)
293 | 
294 |     def run_offline(self):
295 |         # enable traffic measurments for offline test
296 |         is_record = True
297 |         record_stats = False
298 |         self.env.cur_episode = 0
299 |         self.env.init_data(is_record, record_stats, self.output_path)
300 |         rewards = []
301 |         for test_ind in range(self.test_num):
302 |             rewards.append(self.perform(test_ind))
303 |             self.env.terminate()
304 |             time.sleep(2)
305 |             self.env.collect_tripinfo()
306 |         avg_reward = np.mean(np.array(rewards))
307 |         logging.info('Offline testing: avg R: %.2f' % avg_reward)
308 |         self.env.output_data()
309 | 
310 |     def run_online(self, coord):
311 |         self.env.cur_episode = 0
312 |         while not coord.should_stop():
313 |             time.sleep(30)
314 |             if self.global_counter.should_test():
315 |                 rewards = []
316 |                 global_step = self.global_counter.cur_step
317 |                 for test_ind in range(self.test_num):
318 |                     cur_reward = self.perform(test_ind)
319 |                     self.env.terminate()
320 |                     rewards.append(cur_reward)
321 |                     log = {'agent': self.agent,
322 |                            'step': global_step,
323 |                            'test_id': test_ind,
324 |                            'reward': cur_reward}
325 |                     self.data.append(log)
326 |                 avg_reward = np.mean(np.array(rewards))
327 |                 self._add_summary(avg_reward, global_step)
328 |                 logging.info('Testing: global step %d, avg R: %.2f' %
329 |                              (global_step, avg_reward))
330 |                 # self.global_counter.update_test(avg_reward)
331 |         df = pd.DataFrame(self.data)
332 |         df.to_csv(self.output_path + 'train_reward.csv')
333 | 
334 | 
335 | class Evaluator(Tester):
336 |     def __init__(self, env, model, output_path):
337 |         self.env = env
338 |         self.model = model
339 |         self.agent = self.env.agent
340 |         self.env.train_mode = False
341 |         self.test_num = self.env.test_num
342 |         self.output_path = output_path
343 | 
344 |     def run(self):
345 |         is_record = True
346 |         record_stats = False
347 |         self.env.cur_episode = 0
348 |         self.env.init_data(is_record, record_stats, self.output_path)
349 |         time.sleep(1)
350 |         for test_ind in range(self.test_num):
351 |             reward, _ = self.perform(test_ind)
352 |             self.env.terminate()
353 |             logging.info('test %i, avg reward %.2f' % (test_ind, reward))
354 |             time.sleep(2)
355 |             self.env.collect_tripinfo()
356 |         self.env.output_data()
357 | 


--------------------------------------------------------------------------------