├── LICENSE ├── README.md ├── agent ├── __pycache__ │ └── runnable_model.cpython-35.pyc ├── asyncrl │ ├── LICENSE │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── a3c.cpython-35.pyc │ │ ├── dqn_head.cpython-35.pyc │ │ ├── fc_net.cpython-35.pyc │ │ ├── nonlinearity.cpython-35.pyc │ │ ├── policy.cpython-35.pyc │ │ ├── policy_output.cpython-35.pyc │ │ ├── run_train_test.cpython-35.pyc │ │ └── weight_init.cpython-35.pyc │ ├── a3c.py │ ├── dqn_head.py │ ├── fc_net.py │ ├── nonlinearity.py │ ├── policy.py │ ├── policy_output.py │ ├── run_train_test.py │ ├── v_function.py │ └── weight_init.py ├── runnable_model.py └── trained_model │ ├── 9600000.h5 │ └── args.txt └── run_RL.py /LICENSE: -------------------------------------------------------------------------------- 1 | This code is partially based on async-rl code https://github.com/muupan/async-rl 2 | 3 | Copyright (c) 2016 Yasuhiro Fujita 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Reinforcement Learning in CARLA 2 | =============== 3 | 4 | We release a trained RL agent from the CoRL-2017 paper "CARLA: An Open Urban Driving Simulator". This is only the inference code, the training code is not released yet. 5 | 6 | The agent was trained with the asynchronous advantage actor-critic (A3C) algorithm by V. Mnih et al. (2016). We build on this open-source Chainer implementation: https://github.com/muupan/async-rl . 7 | 8 | Dependencies 9 | ------- 10 | Tested with: 11 | 12 | - CARLA 0.8.2 13 | - python 3.6 14 | - chainer 1.24.0 15 | - cached-property 1.4.2 16 | - PIL 5.1.0 17 | - opencv 3.3.1 18 | - h5py 2.7.1 19 | 20 | In Anaconda, you can create and activate an environment with installed dependencies (except for CARLA) by running: 21 | ``` 22 | conda create -n carla_rl python=3.6 chainer=1.24.0 cached-property=1.4.2 pillow=5.1.0 opencv=3.3.1 h5py=2.7.1 23 | source activate carla_rl 24 | ``` 25 | 26 | To start evaluation on the CoRL-2017 benchmark: 27 | ------- 28 | - Start a CARLA server on town TownXX (Town01 or Town02) and port PORT (this is to be executed in the CARLA server folder): 29 | ``` 30 | ./CarlaUE4.sh /Game/Maps/TownXX -carla-server -benchmark -fps=10 -windowed -ResX=800 -ResY=600 -carla-world-port=PORT 31 | ``` 32 | - Make sure CARLA client is in your python path, e.g. by running: 33 | ``` 34 | export PYTHONPATH=/path/to/CARLA/PythonClient:$PYTHONPATH 35 | ``` 36 | - Run the evaluation: 37 | ``` 38 | python run_RL.py --city-name TownXX --port PORT --corl-2017 39 | ``` 40 | The results will be stored in \_benchmarks_results. 41 | 42 | Paper 43 | ----- 44 | 45 | If you use this code in your research, please cite our CoRL 2017 paper: 46 | ``` 47 | @inproceedings{Dosovitskiy17, 48 | title = { {CARLA}: {An} Open Urban Driving Simulator}, 49 | author = {Alexey Dosovitskiy and German Ros and Felipe Codevilla and Antonio Lopez and Vladlen Koltun}, 50 | booktitle = {Proceedings of the 1st Annual Conference on Robot Learning}, 51 | pages = {1--16}, 52 | year = {2017} 53 | } 54 | ``` 55 | -------------------------------------------------------------------------------- /agent/__pycache__/runnable_model.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carla-simulator/reinforcement-learning/b9fe3d417d13ca59d36b187738e40a58abab2765/agent/__pycache__/runnable_model.cpython-35.pyc -------------------------------------------------------------------------------- /agent/asyncrl/LICENSE: -------------------------------------------------------------------------------- 1 | This code is based on async-rl code https://github.com/muupan/async-rl 2 | 3 | The MIT License (MIT) 4 | 5 | Copyright (c) 2016 Yasuhiro Fujita 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | -------------------------------------------------------------------------------- /agent/asyncrl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carla-simulator/reinforcement-learning/b9fe3d417d13ca59d36b187738e40a58abab2765/agent/asyncrl/__init__.py -------------------------------------------------------------------------------- /agent/asyncrl/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carla-simulator/reinforcement-learning/b9fe3d417d13ca59d36b187738e40a58abab2765/agent/asyncrl/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /agent/asyncrl/__pycache__/a3c.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carla-simulator/reinforcement-learning/b9fe3d417d13ca59d36b187738e40a58abab2765/agent/asyncrl/__pycache__/a3c.cpython-35.pyc -------------------------------------------------------------------------------- /agent/asyncrl/__pycache__/dqn_head.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carla-simulator/reinforcement-learning/b9fe3d417d13ca59d36b187738e40a58abab2765/agent/asyncrl/__pycache__/dqn_head.cpython-35.pyc -------------------------------------------------------------------------------- /agent/asyncrl/__pycache__/fc_net.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carla-simulator/reinforcement-learning/b9fe3d417d13ca59d36b187738e40a58abab2765/agent/asyncrl/__pycache__/fc_net.cpython-35.pyc -------------------------------------------------------------------------------- /agent/asyncrl/__pycache__/nonlinearity.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carla-simulator/reinforcement-learning/b9fe3d417d13ca59d36b187738e40a58abab2765/agent/asyncrl/__pycache__/nonlinearity.cpython-35.pyc -------------------------------------------------------------------------------- /agent/asyncrl/__pycache__/policy.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carla-simulator/reinforcement-learning/b9fe3d417d13ca59d36b187738e40a58abab2765/agent/asyncrl/__pycache__/policy.cpython-35.pyc -------------------------------------------------------------------------------- /agent/asyncrl/__pycache__/policy_output.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carla-simulator/reinforcement-learning/b9fe3d417d13ca59d36b187738e40a58abab2765/agent/asyncrl/__pycache__/policy_output.cpython-35.pyc -------------------------------------------------------------------------------- /agent/asyncrl/__pycache__/run_train_test.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carla-simulator/reinforcement-learning/b9fe3d417d13ca59d36b187738e40a58abab2765/agent/asyncrl/__pycache__/run_train_test.cpython-35.pyc -------------------------------------------------------------------------------- /agent/asyncrl/__pycache__/weight_init.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carla-simulator/reinforcement-learning/b9fe3d417d13ca59d36b187738e40a58abab2765/agent/asyncrl/__pycache__/weight_init.cpython-35.pyc -------------------------------------------------------------------------------- /agent/asyncrl/a3c.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from logging import getLogger 3 | import os 4 | 5 | import numpy as np 6 | import chainer 7 | from chainer import serializers 8 | from chainer import functions as F 9 | 10 | logger = getLogger(__name__) 11 | 12 | def check_nans(data,text=''): 13 | for key,val in data.items(): 14 | if np.any(np.isnan(val)): 15 | print(text + 'NaNs in the ' + key + '!!!') 16 | 17 | class A3CModel(chainer.Link): 18 | 19 | def pi_and_v(self, img, meas=None, keep_same_state=False): 20 | raise NotImplementedError() 21 | 22 | def reset_state(self): 23 | pass 24 | 25 | def unchain_backward(self): 26 | pass 27 | 28 | 29 | class A3CActor(object): 30 | def __init__(self, model, random_action_prob=0., input_preprocess=None): 31 | self.model = model 32 | self.random_action_prob = random_action_prob 33 | self.input_preprocess = input_preprocess 34 | self.n_actions = model.n_actions 35 | 36 | def act(self, obs=None, obs_preprocessed=None): 37 | if np.random.rand() > self.random_action_prob: 38 | if not (self.input_preprocess is None): 39 | obs_preprocessed = self.input_preprocess(obs) 40 | img_var = chainer.Variable(np.expand_dims(obs_preprocessed['image'], 0)) 41 | if len(obs_preprocessed['meas']): 42 | meas_var = chainer.Variable(np.expand_dims(obs_preprocessed['meas'], 0)) 43 | check_nans({'meas': meas_var.data}) 44 | else: 45 | meas_var = None 46 | check_nans({'image': img_var.data}) 47 | 48 | pout, _ = self.model.pi_and_v(img_var, meas=meas_var) 49 | action = pout.action_indices[0] 50 | else: 51 | action = np.random.randint(self.n_actions) 52 | return action 53 | 54 | 55 | class A3CTrainer(object): 56 | """A3C: Asynchronous Advantage Actor-Critic. 57 | 58 | See http://arxiv.org/abs/1602.01783 59 | """ 60 | 61 | def __init__(self, model, optimizer, t_max, gamma, beta=1e-2, 62 | process_idx=0, clip_reward=True, input_preprocess=None, 63 | pi_loss_coef=1.0, v_loss_coef=0.5, 64 | keep_loss_scale_same=False): 65 | 66 | # Globally shared model 67 | self.shared_model = model 68 | 69 | # Thread specific model 70 | self.model = copy.deepcopy(self.shared_model) 71 | 72 | self.optimizer = optimizer 73 | self.t_max = t_max 74 | self.gamma = gamma 75 | self.beta = beta 76 | self.process_idx = process_idx 77 | self.clip_reward = clip_reward 78 | self.input_preprocess = input_preprocess 79 | self.pi_loss_coef = pi_loss_coef 80 | self.v_loss_coef = v_loss_coef 81 | self.keep_loss_scale_same = keep_loss_scale_same 82 | self.goal_vector = None 83 | 84 | self.t = 0 85 | self.t_start = 0 86 | self.past_action_log_prob = {} 87 | self.past_action_entropy = {} 88 | self.past_states = {} 89 | self.past_rewards = {} 90 | self.past_values = {} 91 | 92 | def sync_parameters(self): 93 | copy_param.copy_param(target_link=self.model, 94 | source_link=self.shared_model) 95 | 96 | def act_and_update(self, state, reward, is_state_terminal, train_logger=None): 97 | 98 | if self.clip_reward: 99 | reward = np.clip(reward, -1, 1) 100 | 101 | #print('act.py', 'reward', reward) 102 | 103 | if not is_state_terminal: 104 | obs_preprocessed = self.input_preprocess(state) 105 | img_var = chainer.Variable(np.expand_dims(obs_preprocessed['image'], 0)) 106 | #print(img_var.data[::16,::16]) 107 | if self.input_preprocess.num_meas: 108 | meas_var = chainer.Variable(np.expand_dims(obs_preprocessed['meas'], 0)) 109 | check_nans({'meas': meas_var.data}) 110 | else: 111 | meas_var = None 112 | check_nans({'image': img_var.data, 'reward': reward, 'done': is_state_terminal}) 113 | #print(statevar.shape) 114 | 115 | self.past_rewards[self.t - 1] = reward 116 | 117 | if (is_state_terminal and self.t_start < self.t) \ 118 | or self.t - self.t_start == self.t_max: 119 | 120 | assert self.t_start < self.t 121 | 122 | if is_state_terminal: 123 | R = 0 124 | else: 125 | _, vout = self.model.pi_and_v(img_var, meas=meas_var, keep_same_state=True) 126 | R = float(vout.data) 127 | 128 | pi_loss = 0 129 | v_loss = 0 130 | for i in reversed(range(self.t_start, self.t)): 131 | R *= self.gamma 132 | R += self.past_rewards[i] 133 | v = self.past_values[i] 134 | if self.process_idx == 0: 135 | #print('act.py', 'i', i, 'v',v.data, 'R',R) 136 | logger.debug('s:%s v:%s R:%s', 137 | self.past_states[i].data.sum(), v.data, R) 138 | advantage = R - v 139 | # Accumulate gradients of policy 140 | log_prob = self.past_action_log_prob[i] 141 | entropy = self.past_action_entropy[i] 142 | 143 | # Log probability is increased proportionally to advantage 144 | pi_loss -= log_prob * float(advantage.data) 145 | # Entropy is maximized 146 | pi_loss -= self.beta * entropy 147 | # Accumulate gradients of value function 148 | 149 | v_loss += (v - R) ** 2 / 2 150 | 151 | if self.pi_loss_coef != 1.0: 152 | pi_loss *= self.pi_loss_coef 153 | 154 | if self.v_loss_coef != 1.0: 155 | v_loss *= self.v_loss_coef 156 | 157 | # Normalize the loss of sequences truncated by terminal states 158 | if self.keep_loss_scale_same and \ 159 | self.t - self.t_start < self.t_max: 160 | factor = self.t_max / (self.t - self.t_start) 161 | pi_loss *= factor 162 | v_loss *= factor 163 | 164 | if self.process_idx == 0: 165 | logger.debug('pi_loss:%s v_loss:%s', pi_loss.data, v_loss.data) 166 | 167 | total_loss = pi_loss + F.reshape(v_loss, pi_loss.data.shape) 168 | 169 | # Compute gradients using thread-specific model 170 | self.model.zerograds() 171 | total_loss.backward() 172 | # Copy the gradients to the globally shared model 173 | self.shared_model.zerograds() 174 | copy_param.copy_grad( 175 | target_link=self.shared_model, source_link=self.model) 176 | # Update the globally shared model 177 | if self.process_idx == 0: 178 | norm = self.optimizer.compute_grads_norm() 179 | logger.debug('grad norm:%s', norm) 180 | self.optimizer.update() 181 | if self.process_idx == 0: 182 | logger.debug('update') 183 | 184 | if train_logger: 185 | train_logger.log('total loss %f, grad norm %f' % (total_loss.data, self.optimizer.compute_grads_norm())) 186 | 187 | self.sync_parameters() 188 | self.model.unchain_backward() 189 | 190 | self.past_action_log_prob = {} 191 | self.past_action_entropy = {} 192 | self.past_states = {} 193 | self.past_rewards = {} 194 | self.past_values = {} 195 | 196 | self.t_start = self.t 197 | 198 | if not is_state_terminal: 199 | self.past_states[self.t] = img_var 200 | pout, vout = self.model.pi_and_v(img_var, meas=meas_var) 201 | check_nans({'policy': pout.logits.data, 'value': vout.data}) 202 | self.past_action_log_prob[self.t] = pout.sampled_actions_log_probs 203 | self.past_action_entropy[self.t] = pout.entropy 204 | self.past_values[self.t] = vout 205 | self.t += 1 206 | if self.process_idx == 0: 207 | logger.debug('t:%s entropy:%s, probs:%s', 208 | self.t, pout.entropy.data, pout.probs.data) 209 | return pout.action_indices[0] 210 | else: 211 | self.model.reset_state() 212 | return None 213 | 214 | def load_model(self, model_filename): 215 | """Load a network model form a file 216 | """ 217 | serializers.load_hdf5(model_filename, self.model) 218 | copy_param.copy_param(target_link=self.model, 219 | source_link=self.shared_model) 220 | opt_filename = model_filename + '.opt' 221 | if os.path.exists(opt_filename): 222 | print('WARNING: {0} was not found, so loaded only a model'.format( 223 | opt_filename)) 224 | serializers.load_hdf5(model_filename + '.opt', self.optimizer) 225 | 226 | def save_model(self, model_filename): 227 | """Save a network model to a file 228 | """ 229 | serializers.save_hdf5(model_filename, self.model) 230 | serializers.save_hdf5(model_filename + '.opt', self.optimizer) 231 | -------------------------------------------------------------------------------- /agent/asyncrl/dqn_head.py: -------------------------------------------------------------------------------- 1 | import chainer 2 | from chainer import functions as F 3 | from chainer import links as L 4 | from . import nonlinearity 5 | 6 | 7 | class NatureDQNHead(chainer.ChainList): 8 | """DQN's head (Nature version)""" 9 | 10 | #TODO Alexey: bias init used to be 0.1 - does it matter? 11 | def __init__(self, n_input_channels=None, n_output_channels=512, 12 | nonlinearity_str=None, bias=None): 13 | self.n_input_channels = n_input_channels 14 | self.nonlinearity = nonlinearity.get_from_str(nonlinearity_str) 15 | self.n_output_channels = n_output_channels 16 | 17 | layers = [ 18 | L.Convolution2D(n_input_channels, 32, 8, stride=4, bias=bias), 19 | L.Convolution2D(32, 64, 4, stride=2, bias=bias), 20 | L.Convolution2D(64, 64, 3, stride=1, bias=bias), 21 | L.Linear(3136, n_output_channels, bias=bias), 22 | ] 23 | 24 | super(NatureDQNHead, self).__init__(*layers) 25 | 26 | def __call__(self, state): 27 | h = state 28 | for layer in self: 29 | h = self.nonlinearity(layer(h)) 30 | return h 31 | 32 | 33 | class NIPSDQNHead(chainer.ChainList): 34 | """DQN's head (NIPS workshop version)""" 35 | 36 | def __init__(self, n_input_channels=None, n_output_channels=256, 37 | nonlinearity_str=None, bias=None): 38 | self.n_input_channels = n_input_channels 39 | self.nonlinearity = nonlinearity.get_from_str(nonlinearity_str) 40 | self.n_output_channels = n_output_channels 41 | 42 | layers = [ 43 | L.Convolution2D(n_input_channels, 16, 8, stride=4, bias=bias), 44 | L.Convolution2D(16, 32, 4, stride=2, bias=bias), 45 | L.Linear(2592, n_output_channels, bias=bias), 46 | ] 47 | 48 | super(NIPSDQNHead, self).__init__(*layers) 49 | 50 | def __call__(self, state): 51 | h = state 52 | for layer in self: 53 | h = self.nonlinearity(layer(h)) 54 | return h 55 | -------------------------------------------------------------------------------- /agent/asyncrl/fc_net.py: -------------------------------------------------------------------------------- 1 | import chainer 2 | from chainer import functions as F 3 | from chainer import links as L 4 | from . import nonlinearity 5 | 6 | class FCNet(chainer.ChainList): 7 | 8 | def __init__(self, n_channels_list=[], last_nonlinearity=None, nonlinearity_str=None): 9 | # n_channels is a list of channel sizes. The first entry in n_channels_list should be the number of input channels, the last - the number of output channels 10 | #assert len(n_channels_list) >= 2, "The first entry in n_channels_list should be the number of input channels, the last - the number of output channels" 11 | self.last_nonlinearity = last_nonlinearity 12 | self.nonlinearity = nonlinearity.get_from_str(nonlinearity_str) 13 | 14 | layers = [] 15 | for nlayer in range(len(n_channels_list)-1): 16 | layers.append(L.Linear(n_channels_list[nlayer], n_channels_list[nlayer+1])) 17 | super().__init__(*layers) 18 | 19 | def __call__(self, state): 20 | h = state 21 | for layer in self[:-1]: 22 | h = self.nonlinearity(layer(h)) 23 | if len(self) > 0: 24 | if self.last_nonlinearity is True: 25 | h = self.nonlinearity(self[-1](h)) 26 | else: 27 | h = self[-1](h) 28 | return h 29 | -------------------------------------------------------------------------------- /agent/asyncrl/nonlinearity.py: -------------------------------------------------------------------------------- 1 | from chainer import functions as F 2 | import numpy as np 3 | 4 | def get_from_str(nonlinearity_str): 5 | if nonlinearity_str == "relu": 6 | return F.relu 7 | elif nonlinearity_str.startswith("lrelu"): 8 | # string should be in format "lrelu_0.2" , where the number is the negative slope 9 | relu_neg_slope = float(nonlinearity_str.split('_')[1]) 10 | return lambda w: F.leaky_relu(w, slope=relu_neg_slope) 11 | if nonlinearity_str.startswith("elu"): 12 | elu_alpha = float(nonlinearity_str.split('_')[1]) 13 | return lambda w: F.elu(w, alpha=elu_alpha) 14 | else: 15 | raise Exception('Unknown nonlinearity', nonlinearity_str) 16 | 17 | 18 | -------------------------------------------------------------------------------- /agent/asyncrl/policy.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | logger = getLogger(__name__) 3 | 4 | import chainer 5 | from chainer import functions as F 6 | from chainer import links as L 7 | 8 | from . import policy_output 9 | 10 | 11 | class Policy(object): 12 | """Abstract policy class.""" 13 | 14 | def __call__(self, state): 15 | raise NotImplementedError 16 | 17 | 18 | class SoftmaxPolicy(Policy): 19 | """Abstract softmax policy class.""" 20 | 21 | def compute_logits(self, state): 22 | """ 23 | Returns: 24 | ~chainer.Variable: logits of actions 25 | """ 26 | raise NotImplementedError 27 | 28 | def __call__(self, state): 29 | return policy_output.SoftmaxPolicyOutput(self.compute_logits(state)) 30 | 31 | 32 | class FCSoftmaxPolicy(chainer.ChainList, SoftmaxPolicy): 33 | """Softmax policy that consists of FC layers and rectifiers""" 34 | 35 | def __init__(self, n_input_channels, n_actions, 36 | n_hidden_layers=0, n_hidden_channels=None): 37 | self.n_input_channels = n_input_channels 38 | self.n_actions = n_actions 39 | self.n_hidden_layers = n_hidden_layers 40 | self.n_hidden_channels = n_hidden_channels 41 | 42 | layers = [] 43 | if n_hidden_layers > 0: 44 | layers.append(L.Linear(n_input_channels, n_hidden_channels)) 45 | for i in range(n_hidden_layers - 1): 46 | layers.append(L.Linear(n_hidden_channels, n_hidden_channels)) 47 | layers.append(L.Linear(n_hidden_channels, n_actions)) 48 | else: 49 | layers.append(L.Linear(n_input_channels, n_actions)) 50 | 51 | super(FCSoftmaxPolicy, self).__init__(*layers) 52 | 53 | def compute_logits(self, state): 54 | h = state 55 | for layer in self[:-1]: 56 | h = F.relu(layer(h)) 57 | h = self[-1](h) 58 | return h 59 | 60 | 61 | class GaussianPolicy(Policy): 62 | """Abstract Gaussian policy class. 63 | """ 64 | pass 65 | -------------------------------------------------------------------------------- /agent/asyncrl/policy_output.py: -------------------------------------------------------------------------------- 1 | import chainer 2 | from chainer import functions as F 3 | from cached_property import cached_property 4 | import numpy as np 5 | 6 | 7 | class PolicyOutput(object): 8 | """Struct that holds policy output and subproducts.""" 9 | pass 10 | 11 | 12 | def _sample_discrete_actions(batch_probs): 13 | """Sample a batch of actions from a batch of action probabilities. 14 | 15 | Args: 16 | batch_probs (ndarray): batch of action probabilities BxA 17 | Returns: 18 | List consisting of sampled actions 19 | """ 20 | action_indices = [] 21 | 22 | # Subtract a tiny value from probabilities in order to avoid 23 | # "ValueError: sum(pvals[:-1]) > 1.0" in numpy.multinomial 24 | batch_probs = batch_probs - np.finfo(np.float32).epsneg 25 | 26 | for i in range(batch_probs.shape[0]): 27 | histogram = np.random.multinomial(1, batch_probs[i]) 28 | action_indices.append(int(np.nonzero(histogram)[0])) 29 | return action_indices 30 | 31 | 32 | class SoftmaxPolicyOutput(PolicyOutput): 33 | 34 | def __init__(self, logits): 35 | self.logits = logits 36 | 37 | @cached_property 38 | def most_probable_actions(self): 39 | return np.argmax(self.probs.data, axis=1) 40 | 41 | @cached_property 42 | def probs(self): 43 | return F.softmax(self.logits) 44 | 45 | @cached_property 46 | def log_probs(self): 47 | return F.log_softmax(self.logits) 48 | 49 | @cached_property 50 | def action_indices(self): 51 | return _sample_discrete_actions(self.probs.data) 52 | 53 | @cached_property 54 | def sampled_actions_log_probs(self): 55 | return F.select_item( 56 | self.log_probs, 57 | chainer.Variable(np.asarray(self.action_indices, dtype=np.int32))) 58 | 59 | @cached_property 60 | def entropy(self): 61 | return - F.sum(self.probs * self.log_probs, axis=1) 62 | -------------------------------------------------------------------------------- /agent/asyncrl/run_train_test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import multiprocessing as mp 3 | 4 | import sys 5 | sys.path.append('/home/adosovit/work/libs/opencv3/lib/python3') 6 | sys.path=['/home/adosovit/work/libs/anaconda3/lib/python3.5/site-packages'] + sys.path 7 | 8 | import chainer 9 | from chainer import links as L 10 | from chainer import functions as F 11 | from chainer import serializers 12 | 13 | import cv2 14 | import numpy as np 15 | import re 16 | import os 17 | import time 18 | 19 | from . import policy, fc_net, dqn_head, a3c, weight_init 20 | 21 | 22 | class InputPreprocessor: 23 | def __init__(self, meas_coeffs=None, n_images_to_accum=None, meas_list=[]): 24 | self.num_meas = len(meas_coeffs) 25 | self.meas_list = meas_list 26 | self.meas_coeffs = np.array(meas_coeffs, dtype=np.float32) 27 | self.n_images_to_accum=n_images_to_accum 28 | self.img_buffer = np.zeros((self.n_images_to_accum,84,84), dtype = np.uint8) 29 | self.step = 0 30 | 31 | def __call__(self, obs): 32 | img = obs['image'] 33 | 34 | if img.shape[:2] == (84,84): 35 | resized = img 36 | else: 37 | resized = cv2.resize(img, (84, 84)) 38 | 39 | resized = np.mean(resized,axis=2,keepdims=True) 40 | all_meas_list = [np.array(obs[m]) if (isinstance(obs[m], list) or isinstance(obs[m],np.ndarray)) else np.array([obs[m]]) for m in self.meas_list] 41 | meas = np.concatenate(all_meas_list).astype(np.float32) 42 | 43 | if self.step == 0: 44 | # at the first step, fill the buffer with identical images 45 | for n in range(self.n_images_to_accum): 46 | self.img_buffer[n] = np.squeeze(resized) 47 | else: 48 | self.img_buffer[self.step % self.n_images_to_accum] = np.squeeze(resized) 49 | self.step += 1 50 | 51 | return {'image': self.img_buffer[np.arange(self.step-1,self.step-1+self.n_images_to_accum) % self.n_images_to_accum].astype(np.float32) / 255. -0.5, \ 52 | 'meas': meas.astype(np.float32)*self.meas_coeffs, 53 | 'raw_image': img, 54 | 'raw_meas': meas.astype(np.float32)} 55 | 56 | def reset(self): 57 | self.img_buffer *= 0 58 | self.step = 0 59 | 60 | class A3CFF(chainer.ChainList, a3c.A3CModel): 61 | 62 | def __init__(self, n_actions, model_type='advantage', n_meas_in=None, img_fc_layers=None, meas_fc_layers=None, joint_fc_layers=None, head_arch=None, n_input_channels=None, 63 | nonlinearity_str=None, weight_init_str=None, bias_init=None): 64 | if head_arch == 'nature': 65 | self.dqn_net = dqn_head.NatureDQNHead(n_input_channels=n_input_channels, nonlinearity_str=nonlinearity_str, bias=bias_init) 66 | elif head_arch == 'nips': 67 | self.dqn_net = dqn_head.NIPSDQNHead(n_input_channels=n_input_channels, nonlinearity_str=nonlinearity_str, bias=bias_init) 68 | else: 69 | raise Exception('Unknown head architecture', head_arch) 70 | 71 | self.n_meas_in = n_meas_in 72 | self.input_meas = (meas_fc_layers > 0) 73 | self.n_actions = n_actions 74 | nch = self.dqn_net.n_output_channels 75 | 76 | assert joint_fc_layers >= 1, "Should have at least one joint fc layer" 77 | assert meas_fc_layers >= 1, "Should have at least one meas fc layer" 78 | 79 | self.img_fc = fc_net.FCNet([self.dqn_net.n_output_channels] + [nch]*img_fc_layers, last_nonlinearity=True, nonlinearity_str=nonlinearity_str) 80 | self.meas_fc = fc_net.FCNet([self.n_meas_in] + [nch]*meas_fc_layers, last_nonlinearity=True, nonlinearity_str=nonlinearity_str) 81 | self.joint_fc = fc_net.FCNet([2*nch] + [nch]*joint_fc_layers, last_nonlinearity=True, nonlinearity_str=nonlinearity_str) 82 | self.pi = policy.FCSoftmaxPolicy(nch, n_actions) 83 | self.v = fc_net.FCNet([nch,1], last_nonlinearity=False, nonlinearity_str=nonlinearity_str) 84 | super().__init__(self.dqn_net, self.img_fc, self.meas_fc, self.joint_fc, self.pi, self.v) 85 | weight_init.init_with_str(self, init_str=weight_init_str) 86 | 87 | def pi_and_v(self, img, meas=None, keep_same_state=False): 88 | img_feat = self.img_fc(self.dqn_net(img)) 89 | if self.input_meas: 90 | meas_feat = self.meas_fc(meas) 91 | joint_feat = self.joint_fc(F.concat((img_feat, meas_feat), axis=1)) 92 | else: 93 | raise NotImplementedError("No input measurements currently not supported") 94 | joint_feat = self.joint_fc(img_feat) 95 | 96 | return self.pi(joint_feat), self.v(joint_feat) 97 | 98 | 99 | def get_model(n_actions, n_meas, args): 100 | model = A3CFF(n_actions, n_meas_in=n_meas, img_fc_layers=args.img_fc_layers, meas_fc_layers=args.meas_fc_layers, joint_fc_layers=args.joint_fc_layers, head_arch=args.img_conv_arch, 101 | n_input_channels=args.n_images_to_accum, nonlinearity_str=args.nonlinearity, weight_init_str=args.weight_init, bias_init=args.bias_init) 102 | return model 103 | -------------------------------------------------------------------------------- /agent/asyncrl/v_function.py: -------------------------------------------------------------------------------- 1 | import chainer 2 | from chainer import functions as F 3 | from chainer import links as L 4 | 5 | 6 | class VFunction(object): 7 | pass 8 | 9 | 10 | class FCVFunction(chainer.ChainList, VFunction): 11 | 12 | def __init__(self, n_input_channels, n_hidden_layers=0, 13 | n_hidden_channels=None): 14 | self.n_input_channels = n_input_channels 15 | self.n_hidden_layers = n_hidden_layers 16 | self.n_hidden_channels = n_hidden_channels 17 | 18 | layers = [] 19 | if n_hidden_layers > 0: 20 | layers.append(L.Linear(n_input_channels, n_hidden_channels)) 21 | for i in range(n_hidden_layers - 1): 22 | layers.append(L.Linear(n_hidden_channels, n_hidden_channels)) 23 | layers.append(L.Linear(n_hidden_channels, 1)) 24 | else: 25 | layers.append(L.Linear(n_input_channels, 1)) 26 | 27 | super(FCVFunction, self).__init__(*layers) 28 | 29 | def __call__(self, state): 30 | h = state 31 | for layer in self[:-1]: 32 | h = F.relu(layer(h)) 33 | h = self[-1](h) 34 | return h 35 | -------------------------------------------------------------------------------- /agent/asyncrl/weight_init.py: -------------------------------------------------------------------------------- 1 | from chainer import links as L 2 | import numpy as np 3 | 4 | def init_with_str(link, init_str = ""): 5 | if init_str == "xavier": 6 | xavier(link) 7 | elif init_str.startswith("msra"): 8 | # string should be in format "msra_0.2" , where the number is the negative slope 9 | relu_neg_slope = float(init_str.split('_')[1]) 10 | msra(link, relu_neg_slope=relu_neg_slope) 11 | else: 12 | raise Exception('Unknown initialization method', init_str) 13 | 14 | def xavier(link): 15 | # Mimic torch's default parameter initialization 16 | # TODO(muupan): Use chainer's initializers when it is merged 17 | for l in link.links(): 18 | if isinstance(l, L.Linear): 19 | out_channels, in_channels = l.W.data.shape 20 | stdv = 1 / np.sqrt(in_channels) 21 | l.W.data[:] = np.random.uniform(-stdv, stdv, size=l.W.data.shape) 22 | if l.b is not None: 23 | l.b.data[:] = np.random.uniform(-stdv, stdv, 24 | size=l.b.data.shape) 25 | elif isinstance(l, L.Convolution2D): 26 | out_channels, in_channels, kh, kw = l.W.data.shape 27 | stdv = 1 / np.sqrt(in_channels * kh * kw) 28 | l.W.data[:] = np.random.uniform(-stdv, stdv, size=l.W.data.shape) 29 | if l.b is not None: 30 | l.b.data[:] = np.random.uniform(-stdv, stdv, 31 | size=l.b.data.shape) 32 | 33 | def msra(link, relu_neg_slope=0): 34 | # Mimic torch's default parameter initialization 35 | # TODO(muupan): Use chainer's initializers when it is merged 36 | for l in link.links(): 37 | if isinstance(l, L.Linear): 38 | out_channels, in_channels = l.W.data.shape 39 | in_dim = in_channels 40 | elif isinstance(l, L.Convolution2D): 41 | out_channels, in_channels, kh, kw = l.W.data.shape 42 | in_dim = in_channels * kh * kw 43 | else: 44 | return 45 | 46 | stdv = 2 / np.sqrt(in_dim * (1 + relu_neg_slope**2)) 47 | 48 | l.W.data[:] = np.random.uniform(-stdv, stdv, size=l.W.data.shape) 49 | if l.b is not None: 50 | l.b.data[:] = np.random.uniform(-stdv, stdv, 51 | size=l.b.data.shape) 52 | -------------------------------------------------------------------------------- /agent/runnable_model.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import sys 4 | import os 5 | 6 | from .asyncrl import run_train_test 7 | from .asyncrl import a3c 8 | import numpy as np 9 | from chainer import serializers 10 | 11 | from carla.agent import Agent 12 | from carla.carla_server_pb2 import Control 13 | 14 | class Struct: 15 | def __init__(self, **entries): 16 | self.__dict__.update(entries) 17 | 18 | class A3CAgent(Agent): 19 | def __init__(self, city_name, args_file='', model_file='', n_actions=0, frameskip=1): 20 | Agent.__init__(self) 21 | self.args = self.read_args(args_file) 22 | self.args.model = model_file 23 | self.n_actions = n_actions 24 | self.n_meas = self.compute_n_meas(self.args) 25 | self.args.town_traintest = city_name 26 | self.setup_model(self.n_actions, self.n_meas, self.args) 27 | self.setup_data_preprocessor(self.args) 28 | self.frameskip = frameskip 29 | self.step = 0 30 | 31 | def run_step(self, meas, sensory, directions, target): 32 | # print('Step {}'.format(self.step)) 33 | if self.step % self.frameskip == 0: 34 | obs_preprocessed = self.preprocess_data(meas, sensory, directions, target) 35 | action_idx = self.actor.act(obs_preprocessed=obs_preprocessed) 36 | action = self.actions[action_idx] 37 | control = Control() 38 | if self.obs_dict['speed'] < 30.: 39 | control.throttle = action[0] 40 | elif control.throttle > 0.: 41 | control.throttle = 0. 42 | control.steer = action[1] 43 | self.prev_control = control 44 | else: 45 | control = self.prev_control 46 | # print('Repeating control') 47 | self.step += 1 48 | print(control.throttle, control.steer) 49 | return control 50 | 51 | def read_args(self, args_file): 52 | with open(args_file, 'r') as f: 53 | args_dict = eval(f.read()) 54 | return Struct(**args_dict) 55 | 56 | def compute_n_meas(self, args): 57 | modalities = ['accel_x', 'accel_y', 'collision_car', 'collision_gen', 'collision_ped', 'game_timestamp', 'platform_timestamp', 'image', 'ori_x', 'ori_y', 'ori_z', 'player_x', 'player_y', 'intersect_otherlane', 'intersect_offroad', 'speed', 'vector_to_goal', 'distance_to_goal', 'planner_command', 'step'] 58 | dimensionalities = {m: 1 for m in modalities} 59 | dimensionalities.update({'image': None, 'vector_to_goal': 2, 'planner_command': 5}) 60 | n_meas = sum([dimensionalities[m] for m in args.meas_list]) 61 | return n_meas 62 | 63 | def setup_model(self, n_actions, n_meas, args): 64 | self.model = run_train_test.get_model(n_actions, n_meas, args) 65 | serializers.load_hdf5(args.model, self.model) 66 | 67 | if type(self.model).__name__ == 'A3CFF': 68 | self.actor = a3c.A3CActor(self.model, input_preprocess=None, random_action_prob=0.) 69 | else: 70 | raise Exception('Unknown model type', type(model).__name__) 71 | 72 | if (not hasattr(args, 'carla_action_set')) or args.carla_action_set == '9': 73 | self.actions = [[0., 0.], [-1.,0.], [-0.5,0.], [0.5, 0.], [1.0, 0.], [0., -1.], [0., -0.5], [0., 0.5], [0.,1.]] 74 | elif args.carla_action_set == '13': 75 | self.actions = [[0., 0.], [-1.,0.], [-0.5,0.], [-0.25,0.], [0.25,0.], [0.5, 0.], [1.0, 0.], [0., -1.], [0., -0.5], [0., -0.25], [0., 0.25], [0., 0.5], [0.,1.]] 76 | else: 77 | raise Exception('Unknown args.carla_action_set {}'.format(args.carla_action_set)) 78 | 79 | def setup_data_preprocessor(self, args): 80 | meas_coeffs_dict = {'step': [1/500.], 'vector_to_goal': [1/5000.,1/5000.], 'distance_to_goal': [1/1000.], 'speed': [1/10.], 'collision_gen': [1/500000.], \ 81 | 'collision_ped': [1/100000.], 'collision_car': [1/500000.], 'intersect_offroad': [1.], 'intersect_otherlane': [1.], 'planner_command': [0.5,0.5,0.5,0.5,0.5]} 82 | meas_coeffs = np.concatenate([np.array(meas_coeffs_dict[m]) for m in args.meas_list]).astype(np.float32) 83 | self.input_preprocessor = run_train_test.InputPreprocessor(meas_coeffs=meas_coeffs, n_images_to_accum=args.n_images_to_accum, meas_list=args.meas_list) 84 | 85 | def preprocess_data(self, meas, sensory, planner_command, target): 86 | self.obs_dict = self.data_from_simulator_to_dict(meas, sensory, planner_command, target) 87 | print('Planner', self.obs_dict['planner_command']) 88 | obs_preprocessed = self.input_preprocessor(self.obs_dict) 89 | return obs_preprocessed 90 | 91 | def data_from_simulator_to_dict(self, measurements, sensory, planner_command, target): 92 | modalities = ['accel_x', 'accel_y', 'collision_car', 'collision_gen', 'collision_ped', 'game_timestamp', 'platform_timestamp', 'image', 'ori_x', 'ori_y', 'ori_z', 'player_x', 'player_y', 'intersect_otherlane', 'intersect_offroad', 'speed', 'vector_to_goal', 'distance_to_goal', 'planner_command'] 93 | if measurements is None: 94 | data_dict = {m: None for m in (modalities + ['image', 'goal_pos', 'goal_ori', 'step', 'planner_command'])} 95 | else: 96 | player_measurements = measurements.player_measurements 97 | print(player_measurements) 98 | data_dict = {} 99 | # NOTE we convert new SI CARLA units (from 0.8.0) to old CARLA units (pre-0.8.0) since the model was trained in old CARLA 100 | data_dict['accel_x'] = player_measurements.acceleration.x*100. 101 | data_dict['accel_y'] = player_measurements.acceleration.y*100. 102 | data_dict['collision_car'] = player_measurements.collision_vehicles*100. 103 | data_dict['collision_ped'] = player_measurements.collision_pedestrians*100. 104 | data_dict['collision_gen'] = player_measurements.collision_other*100. 105 | data_dict['ori_x'] = player_measurements.transform.orientation.x 106 | data_dict['ori_y'] = player_measurements.transform.orientation.y 107 | data_dict['ori_z'] = player_measurements.transform.orientation.z 108 | data_dict['player_x'] = player_measurements.transform.location.x*100. 109 | data_dict['player_y'] = player_measurements.transform.location.y*100. 110 | #data_dict['player_z'] = player_measurements.transform.location.z 111 | data_dict['intersect_otherlane'] = player_measurements.intersection_otherlane 112 | data_dict['intersect_offroad'] = player_measurements.intersection_offroad 113 | data_dict['speed'] = player_measurements.forward_speed*3.6 114 | 115 | data_dict['game_timestamp'] = 0. 116 | data_dict['platform_timestamp'] = 0. 117 | 118 | print(sensory, dir(sensory['CameraRGB'])) 119 | # print(sensory['CameraRGB'].data) 120 | # print(sensory['CameraRGB'].raw_data) 121 | data_dict['image'] = sensory['CameraRGB'].data[:,:,:-1][:,:,::-1] # get rid of A and then revert channels 122 | 123 | data_dict['goal_pos'] = (target.location.x*100., target.location.y*100.) 124 | data_dict['goal_ori'] = (target.orientation.x, target.orientation.y) 125 | data_dict['step'] = 0. 126 | 127 | pos = (data_dict['player_x'], data_dict['player_y'], 22) 128 | ori = (data_dict['ori_x'], data_dict['ori_y'], data_dict['ori_z']) 129 | goal_pos_3d = (data_dict['goal_pos'][0], data_dict['goal_pos'][1], 22) 130 | goal_ori_3d = (data_dict['goal_ori'][0], data_dict['goal_ori'][1], -0.001) 131 | data_dict['planner_command'] = planner_command 132 | 133 | player_pos = np.array([data_dict['player_x'], data_dict['player_y']]) 134 | to_goal = data_dict['goal_pos'] - player_pos 135 | to_agents_coords_matrix = np.array([[data_dict['ori_x'], data_dict['ori_y']], [-data_dict['ori_y'], data_dict['ori_x']]]) 136 | data_dict['vector_to_goal'] = to_agents_coords_matrix.dot(to_goal) 137 | data_dict['distance_to_goal'] = np.sqrt(np.sum(np.abs(to_goal)**2, keepdims=True)) 138 | 139 | planner_command_onehot = np.zeros(5) 140 | if data_dict['planner_command'] is None: 141 | pass 142 | else: 143 | assert (data_dict['planner_command'] in [0.,2.,3.,4.,5.]), 'Got planner command {}. Expected to be one of: 0,2,3,4,5'.format(data_dict['planner_command']) 144 | if data_dict['planner_command'] == 0: 145 | planner_command_onehot[0] = 1. 146 | else: 147 | planner_command_onehot[int(data_dict['planner_command'])-1] = 1. 148 | data_dict['planner_command'] = planner_command_onehot 149 | 150 | if data_dict['distance_to_goal'] > 4000.: 151 | data_dict['distance_to_goal'] = 4000. 152 | 153 | for m in modalities: 154 | assert (m in data_dict), "data_dict should have field {}".format(m) 155 | 156 | return data_dict 157 | -------------------------------------------------------------------------------- /agent/trained_model/9600000.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carla-simulator/reinforcement-learning/b9fe3d417d13ca59d36b187738e40a58abab2765/agent/trained_model/9600000.h5 -------------------------------------------------------------------------------- /agent/trained_model/args.txt: -------------------------------------------------------------------------------- 1 | {'agent_restart_pos_list': [41, 2 | 13, 3 | 32, 4 | 26, 5 | 107, 6 | 79, 7 | 57, 8 | 123, 9 | 82, 10 | 61, 11 | 107, 12 | 92, 13 | 57, 14 | 50, 15 | 136, 16 | 27, 17 | 136, 18 | 107, 19 | 133, 20 | 137, 21 | 50, 22 | 99, 23 | 97, 24 | 43, 25 | 143, 26 | 130, 27 | 50, 28 | 32, 29 | 103, 30 | 47, 31 | 48, 32 | 13, 33 | 141, 34 | 70, 35 | 16, 36 | 88, 37 | 23, 38 | 47, 39 | 100, 40 | 49, 41 | 98, 42 | 29, 43 | 133, 44 | 113, 45 | 144, 46 | 145, 47 | 145, 48 | 134, 49 | 53, 50 | 51, 51 | 104, 52 | 40, 53 | 111, 54 | 78, 55 | 24, 56 | 53, 57 | 15, 58 | 85, 59 | 54, 60 | 49, 61 | 90, 62 | 27, 63 | 73, 64 | 44, 65 | 92, 66 | 21, 67 | 2, 68 | 106, 69 | 25, 70 | 131, 71 | 16, 72 | 22, 73 | 44, 74 | 91, 75 | 136, 76 | 139, 77 | 92, 78 | 109, 79 | 74, 80 | 8, 81 | 8, 82 | 93, 83 | 28, 84 | 26, 85 | 135, 86 | 26, 87 | 15, 88 | 151, 89 | 20, 90 | 11, 91 | 132, 92 | 25, 93 | 108, 94 | 28, 95 | 52, 96 | 10, 97 | 68, 98 | 11, 99 | 141, 100 | 123, 101 | 116, 102 | 7, 103 | 11, 104 | 140, 105 | 101, 106 | 17, 107 | 51, 108 | 18, 109 | 45, 110 | 23, 111 | 24, 112 | 119, 113 | 69, 114 | 106, 115 | 46, 116 | 19, 117 | 128, 118 | 135, 119 | 142, 120 | 118, 121 | 71, 122 | 123, 123 | 91, 124 | 143, 125 | 134, 126 | 11, 127 | 148, 128 | 17, 129 | 56, 130 | 22, 131 | 85, 132 | 125, 133 | 67, 134 | 121, 135 | 42, 136 | 135, 137 | 115, 138 | 134, 139 | 18, 140 | 52, 141 | 135, 142 | 13, 143 | 18, 144 | 55, 145 | 22, 146 | 51, 147 | 45, 148 | 90, 149 | 107, 150 | 145], 151 | 'batch_size': 16, 152 | 'beta': 0.01, 153 | 'bias_init': 0.0, 154 | 'carla_config_files': ['carla_configs/CarlaConfig_weather1.ini', 155 | 'carla_configs/CarlaConfig_weather3.ini', 156 | 'carla_configs/CarlaConfig_weather6.ini', 157 | 'carla_configs/CarlaConfig_weather8.ini'], 158 | 'continue_training': True, 159 | 'each_button_is_an_action': False, 160 | 'environment': 'carla', 161 | 'eps': 0.1, 162 | 'eval_frequency': 100000, 163 | 'eval_n_runs': 20, 164 | 'final_eval_n_runs': 50, 165 | 'frameskip': 1, 166 | 'goal_restart_pos_list': [39, 167 | 22, 168 | 132, 169 | 12, 170 | 97, 171 | 103, 172 | 135, 173 | 28, 174 | 72, 175 | 57, 176 | 122, 177 | 23, 178 | 137, 179 | 16, 180 | 52, 181 | 151, 182 | 58, 183 | 94, 184 | 43, 185 | 90, 186 | 18, 187 | 84, 188 | 84, 189 | 21, 190 | 138, 191 | 127, 192 | 46, 193 | 82, 194 | 10, 195 | 16, 196 | 44, 197 | 29, 198 | 136, 199 | 66, 200 | 93, 201 | 129, 202 | 14, 203 | 18, 204 | 96, 205 | 53, 206 | 96, 207 | 124, 208 | 65, 209 | 109, 210 | 146, 211 | 143, 212 | 59, 213 | 50, 214 | 137, 215 | 56, 216 | 78, 217 | 47, 218 | 106, 219 | 80, 220 | 93, 221 | 139, 222 | 51, 223 | 98, 224 | 132, 225 | 16, 226 | 17, 227 | 89, 228 | 83, 229 | 39, 230 | 15, 231 | 102, 232 | 127, 233 | 104, 234 | 27, 235 | 1, 236 | 91, 237 | 93, 238 | 41, 239 | 53, 240 | 56, 241 | 144, 242 | 17, 243 | 10, 244 | 76, 245 | 24, 246 | 123, 247 | 134, 248 | 12, 249 | 9, 250 | 142, 251 | 22, 252 | 46, 253 | 129, 254 | 14, 255 | 24, 256 | 130, 257 | 12, 258 | 110, 259 | 14, 260 | 18, 261 | 32, 262 | 63, 263 | 27, 264 | 134, 265 | 151, 266 | 119, 267 | 3, 268 | 19, 269 | 124, 270 | 10, 271 | 46, 272 | 137, 273 | 91, 274 | 49, 275 | 12, 276 | 91, 277 | 99, 278 | 77, 279 | 10, 280 | 41, 281 | 93, 282 | 131, 283 | 90, 284 | 58, 285 | 89, 286 | 77, 287 | 26, 288 | 136, 289 | 141, 290 | 58, 291 | 29, 292 | 64, 293 | 53, 294 | 142, 295 | 15, 296 | 95, 297 | 8, 298 | 71, 299 | 96, 300 | 47, 301 | 139, 302 | 113, 303 | 52, 304 | 137, 305 | 16, 306 | 92, 307 | 19, 308 | 93, 309 | 137, 310 | 91, 311 | 135, 312 | 47, 313 | 25, 314 | 120, 315 | 141], 316 | 'goal_vector_meas_str': [], 317 | 'goal_vector_temporal': [0.0, 318 | 0.0, 319 | 0.0, 320 | 0.0, 321 | 1.0], 322 | 'gradient_clipping': 100.0, 323 | 'host': 'vcl-gpu2-ib', 324 | 'img_conv_arch': 'nature', 325 | 'img_fc_layers': 0, 326 | 'init_model': 'results/72_c71_doom02params/3900000.h5', 327 | 'joint_fc_layers': 1, 328 | 'logging': False, 329 | 'lr': 0.0007, 330 | 'lr_sched': 'linear_10000000', 331 | 'meas_fc_layers': 2, 332 | 'meas_list': ['step', 333 | 'distance_to_goal', 334 | 'speed', 335 | 'collision_gen', 336 | 'collision_car', 337 | 'collision_ped', 338 | 'intersect_otherlane', 339 | 'intersect_offroad', 340 | 'planner_command'], 341 | 'method': 'a3c', 342 | 'mode': 'train', 343 | 'model': None, 344 | 'model_type': 'advantage', 345 | 'momentum1': 0.9, 346 | 'momentum2': 0.99, 347 | 'n_images_to_accum': 2, 348 | 'no_reward_clipping': True, 349 | 'nonlinearity': 'lrelu_0.1', 350 | 'num_start_actions': 10, 351 | 'optimizer': 'rmsprop', 352 | 'outdir': 'results/72_c71_doom02params', 353 | 'port': 2000, 354 | 'processes': 10, 355 | 'profile': False, 356 | 'random_exploration_schedule_baseline': 0.0, 357 | 'random_exploration_schedule_halflife': 100000.0, 358 | 'restart_pos_schedule': [], 359 | 'reward_coeffs_dict': {'collision_car': -2e-05, 360 | 'collision_gen': -2e-05, 361 | 'collision_ped': -2e-05, 362 | 'distance_to_goal': -0.01, 363 | 'intersect_offroad': -2.0, 364 | 'intersect_otherlane': -2.0, 365 | 'speed': 0.05}, 366 | 'reward_coeffs_str': ['distance_to_goal', 367 | '-0.01', 368 | 'speed', 369 | '0.05', 370 | 'collision_gen', 371 | '-0.00002', 372 | 'collision_car', 373 | '-0.00002', 374 | 'collision_ped', 375 | '-0.00002', 376 | 'intersect_otherlane', 377 | '-2.0', 378 | 'intersect_offroad', 379 | '-2.0'], 380 | 'save_frequency': 100000, 381 | 'scenario': None, 382 | 'seed': None, 383 | 'start_actions_type': 'full_gas_random_steer', 384 | 'steps': 10000000, 385 | 't_max': 20, 386 | 'test_random_action_prob': 0, 387 | 'timeout': 300, 388 | 'timesteps_to_predict': [1, 389 | 2, 390 | 4, 391 | 8, 392 | 16], 393 | 'town_traintest': 'train', 394 | 'use_lstm': False, 395 | 'use_planner': True, 396 | 'weight_init': 'msra_0.1', 397 | 'window_visible': False, 398 | 'write_images_to': ''} 399 | -------------------------------------------------------------------------------- /run_RL.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Copyright (c) 2017 Computer Vision Center (CVC) at the Universitat Autonoma de 4 | # Barcelona (UAB). 5 | # 6 | # This work is licensed under the terms of the MIT license. 7 | # For a copy, see . 8 | 9 | import argparse 10 | import logging 11 | 12 | from carla.driving_benchmark import run_driving_benchmark 13 | from carla.driving_benchmark.experiment_suites import CoRL2017, BasicExperimentSuite 14 | from agent.runnable_model import A3CAgent 15 | 16 | 17 | if __name__ == '__main__': 18 | 19 | argparser = argparse.ArgumentParser(description=__doc__) 20 | argparser.add_argument( 21 | '-v', '--verbose', 22 | action='store_true', 23 | dest='verbose', 24 | help='print some extra status information') 25 | argparser.add_argument( 26 | '-db', '--debug', 27 | action='store_true', 28 | dest='debug', 29 | help='print debug information') 30 | argparser.add_argument( 31 | '--host', 32 | metavar='H', 33 | default='localhost', 34 | help='IP of the host server (default: localhost)') 35 | argparser.add_argument( 36 | '-p', '--port', 37 | metavar='P', 38 | default=2000, 39 | type=int, 40 | help='TCP port to listen to (default: 2000)') 41 | argparser.add_argument( 42 | '-c', '--city-name', 43 | metavar='C', 44 | default='Town01', 45 | help='The town that is going to be used on benchmark' 46 | + '(needs to match active town in server, options: Town01 or Town02)') 47 | argparser.add_argument( 48 | '-n', '--log_name', 49 | metavar='T', 50 | default='test', 51 | help='The name of the log file to be created by the benchmark' 52 | ) 53 | argparser.add_argument( 54 | '--corl-2017', 55 | action='store_true', 56 | help='If you want to benchmark the corl-2017 instead of the Basic one' 57 | ) 58 | argparser.add_argument( 59 | '--continue-experiment', 60 | action='store_true', 61 | help='If you want to continue the experiment with the same name' 62 | ) 63 | 64 | 65 | args = argparser.parse_args() 66 | if args.debug: 67 | log_level = logging.DEBUG 68 | elif args.verbose: 69 | log_level = logging.INFO 70 | else: 71 | log_level = logging.WARNING 72 | 73 | logging.basicConfig(format='%(levelname)s: %(message)s', level=log_level) 74 | logging.info('listening to server %s:%s', args.host, args.port) 75 | 76 | agent = A3CAgent(args.city_name, args_file='agent/trained_model/args.txt', 77 | model_file='agent/trained_model/9600000.h5', n_actions=9, frameskip=1) 78 | 79 | # We instantiate an experiment suite. Basically a set of experiments 80 | # that are going to be evaluated on this benchmark. 81 | if args.corl_2017: 82 | experiment_suite = CoRL2017(args.city_name) 83 | else: 84 | experiment_suite = BasicExperimentSuite(args.city_name) 85 | 86 | # Now actually run the agent_benchmark 87 | run_driving_benchmark(agent, experiment_suite, args.city_name, 88 | args.log_name, args.continue_experiment, 89 | args.host, args.port) 90 | --------------------------------------------------------------------------------